mirror of
https://github.com/valitydev/progressor.git
synced 2024-11-06 00:15:21 +00:00
epic/TD-927/progressor-prototype (#1)
* TD-934: add prototype implementation * TD-934: fix * TD-934: fix spec * TD-934: fix spec * TD-927: add context * TD-927: fix json saving * TD-927: fix * TD-927: fix calls processing * TD-927: fix call and repair processing, add remove action impl. * TD-934: add kafka client * XP-934: bump epg_connector * TD-927: fix for table names * XP-927: add common tests * TD-927: reworks * TD-927: cleanup * TD-927: fix pg_backend * TD-927: fix calls procesiing * TD-927: add benchmark * TD-927: bump epg_connector * TD-927: fixes * TD-927: fix timers saving * TD-927: bump epg_connector * XP-927: fix bench config * TD-927: fix worker * TD-927: cleanup * TD-927: add logs * TD-927: add logs * TD-927: add logs * TD-927: add metrics * TD-927: fix scheduler pop task * TD-927: rework pools * TD-927: some optimizations * TD-927: rework pg backend * TD-927: update test config * TD-927: bump epg_connector, cleanup --------- Co-authored-by: ttt161 <losto@nix>
This commit is contained in:
parent
6be3284926
commit
79f16f9cc3
5
.env
Normal file
5
.env
Normal file
@ -0,0 +1,5 @@
|
||||
SERVICE_NAME=progressor
|
||||
OTP_VERSION=25.3
|
||||
REBAR_VERSION=3.18
|
||||
THRIFT_VERSION=0.14.2.3
|
||||
CONFLUENT_PLATFORM_VERSION=5.1.2
|
20
.gitignore
vendored
Normal file
20
.gitignore
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
.rebar3
|
||||
_*
|
||||
.eunit
|
||||
*.o
|
||||
*.beam
|
||||
*.plt
|
||||
*.swp
|
||||
*.swo
|
||||
.erlang.cookie
|
||||
ebin
|
||||
log
|
||||
erl_crash.dump
|
||||
.rebar
|
||||
logs
|
||||
_build
|
||||
.idea
|
||||
*.iml
|
||||
rebar3.crashdump
|
||||
*~
|
||||
benchmark/base_bench/rebar.lock
|
0
.image.dev
Normal file
0
.image.dev
Normal file
17
Dockerfile.dev
Normal file
17
Dockerfile.dev
Normal file
@ -0,0 +1,17 @@
|
||||
ARG OTP_VERSION
|
||||
|
||||
FROM docker.io/library/erlang:${OTP_VERSION}
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
# Install thrift compiler
|
||||
ARG THRIFT_VERSION
|
||||
ARG TARGETARCH=amd64
|
||||
RUN wget -q -O- "https://github.com/valitydev/thrift/releases/download/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}-linux-${TARGETARCH}.tar.gz" \
|
||||
| tar -xvz -C /usr/local/bin/
|
||||
|
||||
# Set env
|
||||
ENV CHARSET=UTF-8
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
# Set runtime
|
||||
CMD ["/bin/bash"]
|
119
Makefile
Normal file
119
Makefile
Normal file
@ -0,0 +1,119 @@
|
||||
# HINT
|
||||
# Use this file to override variables here.
|
||||
# For example, to run with podman put `DOCKER=podman` there.
|
||||
-include Makefile.env
|
||||
|
||||
# NOTE
|
||||
# Variables specified in `.env` file are used to pick and setup specific
|
||||
# component versions, both when building a development image and when running
|
||||
# CI workflows on GH Actions. This ensures that tasks run with `wc-` prefix
|
||||
# (like `wc-dialyze`) are reproducible between local machine and CI runners.
|
||||
DOTENV := $(shell grep -v '^\#' .env)
|
||||
|
||||
# Development images
|
||||
DEV_IMAGE_TAG = $(TEST_CONTAINER_NAME)-dev
|
||||
DEV_IMAGE_ID = $(file < .image.dev)
|
||||
|
||||
DOCKER ?= docker
|
||||
DOCKERCOMPOSE ?= docker-compose
|
||||
DOCKERCOMPOSE_W_ENV = DEV_IMAGE_TAG=$(DEV_IMAGE_TAG) $(DOCKERCOMPOSE) -f docker-compose.yml
|
||||
REBAR ?= rebar3
|
||||
TEST_CONTAINER_NAME ?= testrunner
|
||||
|
||||
all: compile
|
||||
|
||||
.PHONY: dev-image clean-dev-image wc-shell test
|
||||
|
||||
dev-image: .image.dev
|
||||
|
||||
get-submodules:
|
||||
git submodule init
|
||||
git submodule update
|
||||
|
||||
.image.dev: get-submodules Dockerfile.dev .env
|
||||
env $(DOTENV) $(DOCKERCOMPOSE_W_ENV) build $(TEST_CONTAINER_NAME)
|
||||
$(DOCKER) image ls -q -f "reference=$(DEV_IMAGE_ID)" | head -n1 > $@
|
||||
|
||||
clean-dev-image:
|
||||
ifneq ($(DEV_IMAGE_ID),)
|
||||
$(DOCKER) image rm -f $(DEV_IMAGE_TAG)
|
||||
rm .image.dev
|
||||
endif
|
||||
|
||||
DOCKER_WC_OPTIONS := -v $(PWD):$(PWD) --workdir $(PWD)
|
||||
DOCKER_WC_EXTRA_OPTIONS ?= --rm
|
||||
DOCKER_RUN = $(DOCKER) run -t $(DOCKER_WC_OPTIONS) $(DOCKER_WC_EXTRA_OPTIONS)
|
||||
|
||||
DOCKERCOMPOSE_RUN = $(DOCKERCOMPOSE_W_ENV) run --rm $(DOCKER_WC_OPTIONS)
|
||||
|
||||
# Utility tasks
|
||||
|
||||
wc-shell: dev-image
|
||||
$(DOCKER_RUN) --interactive --tty $(DEV_IMAGE_TAG)
|
||||
|
||||
wc-%: dev-image
|
||||
$(DOCKER_RUN) $(DEV_IMAGE_TAG) make $*
|
||||
|
||||
wdeps-shell: dev-image
|
||||
$(DOCKERCOMPOSE_RUN) $(TEST_CONTAINER_NAME) su; \
|
||||
$(DOCKERCOMPOSE_W_ENV) down
|
||||
|
||||
wdeps-%: dev-image
|
||||
$(DOCKERCOMPOSE_RUN) -T $(TEST_CONTAINER_NAME) make $(if $(MAKE_ARGS),$(MAKE_ARGS) $*,$*); \
|
||||
res=$$?; \
|
||||
$(DOCKERCOMPOSE_W_ENV) down; \
|
||||
exit $$res
|
||||
|
||||
# Submodules tasks
|
||||
|
||||
make_psql_migration:
|
||||
make -C psql-migration/
|
||||
mkdir -p bin
|
||||
mkdir -p migrations
|
||||
cp ./psql-migration/_build/default/bin/psql_migration ./bin
|
||||
|
||||
# Rebar tasks
|
||||
|
||||
rebar-shell:
|
||||
$(REBAR) shell
|
||||
|
||||
compile:
|
||||
$(REBAR) compile
|
||||
|
||||
xref:
|
||||
$(REBAR) xref
|
||||
|
||||
lint:
|
||||
$(REBAR) lint
|
||||
|
||||
check-format:
|
||||
$(REBAR) fmt -c
|
||||
|
||||
dialyze:
|
||||
$(REBAR) as test dialyzer
|
||||
|
||||
release:
|
||||
$(REBAR) as prod release
|
||||
|
||||
eunit:
|
||||
$(REBAR) eunit --cover
|
||||
|
||||
common-test:
|
||||
$(REBAR) ct --cover
|
||||
|
||||
cover:
|
||||
$(REBAR) covertool generate
|
||||
|
||||
format:
|
||||
$(REBAR) fmt -w
|
||||
|
||||
clean:
|
||||
$(REBAR) clean
|
||||
|
||||
distclean: clean-build-image
|
||||
rm -rf _build
|
||||
|
||||
test: eunit common-test
|
||||
|
||||
cover-report:
|
||||
$(REBAR) cover
|
76
README.md
76
README.md
@ -1 +1,77 @@
|
||||
# progressor
|
||||
|
||||
## Usage
|
||||
TODO
|
||||
|
||||
## Observability
|
||||
|
||||
\# HELP progressor_notification_duration_ms Notification durations in millisecond
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="10"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="50"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="150"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="300"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="500"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="1000"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="lifecycle_sink",le="+Inf"} 9311
|
||||
progressor_notification_duration_ms_count{service="hellgate",namespace="default",notification_type="lifecycle_sink"} 9311
|
||||
progressor_notification_duration_ms_sum{service="hellgate",namespace="default",notification_type="lifecycle_sink"} 60
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="10"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="50"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="150"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="300"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="500"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="1000"} 9311
|
||||
progressor_notification_duration_ms_bucket{service="hellgate",namespace="default",notification_type="event_sink",le="+Inf"} 9311
|
||||
progressor_notification_duration_ms_count{service="hellgate",namespace="default",notification_type="event_sink"} 9311
|
||||
progressor_notification_duration_ms_sum{service="hellgate",namespace="default",notification_type="event_sink"} 19
|
||||
\# TYPE progressor_task_completion_duration_ms histogram
|
||||
\# HELP progressor_task_completion_duration_ms Task completion durations in millisecond
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="50"} 26
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="150"} 32
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="300"} 50
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="500"} 50
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="750"} 50
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="1000"} 50
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_suspend",le="+Inf"} 50
|
||||
progressor_task_completion_duration_ms_count{service="hellgate",namespace="default",completion_type="complete_and_suspend"} 50
|
||||
progressor_task_completion_duration_ms_sum{service="hellgate",namespace="default",completion_type="complete_and_suspend"} 5100
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="50"} 298
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="150"} 4596
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="300"} 8926
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="500"} 9258
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="750"} 9261
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="1000"} 9261
|
||||
progressor_task_completion_duration_ms_bucket{service="hellgate",namespace="default",completion_type="complete_and_continue",le="+Inf"} 9261
|
||||
progressor_task_completion_duration_ms_count{service="hellgate",namespace="default",completion_type="complete_and_continue"} 9261
|
||||
progressor_task_completion_duration_ms_sum{service="hellgate",namespace="default",completion_type="complete_and_continue"} 1473582
|
||||
\# TYPE progressor_process_removing_duration_ms histogram
|
||||
\# HELP progressor_process_removing_duration_ms Task completion durations in millisecond
|
||||
\# TYPE progressor_task_processing_duration_ms histogram
|
||||
\# HELP progressor_task_processing_duration_ms Task processing durations in millisecond
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="50"} 9261
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="150"} 9261
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="300"} 9261
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="500"} 9261
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="750"} 9261
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="1000"} 9261
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="timeout",le="+Inf"} 9261
|
||||
progressor_task_processing_duration_ms_count{service="hellgate",namespace="default",task_type="timeout"} 9261
|
||||
progressor_task_processing_duration_ms_sum{service="hellgate",namespace="default",task_type="timeout"} 176
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="50"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="150"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="300"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="500"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="750"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="1000"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="init",le="+Inf"} 25
|
||||
progressor_task_processing_duration_ms_count{service="hellgate",namespace="default",task_type="init"} 25
|
||||
progressor_task_processing_duration_ms_sum{service="hellgate",namespace="default",task_type="init"} 0
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="50"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="150"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="300"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="500"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="750"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="1000"} 25
|
||||
progressor_task_processing_duration_ms_bucket{service="hellgate",namespace="default",task_type="call",le="+Inf"} 25
|
||||
progressor_task_processing_duration_ms_count{service="hellgate",namespace="default",task_type="call"} 25
|
||||
progressor_task_processing_duration_ms_sum{service="hellgate",namespace="default",task_type="call"} 1
|
||||
|
5
benchmark/base_bench/.env
Normal file
5
benchmark/base_bench/.env
Normal file
@ -0,0 +1,5 @@
|
||||
SERVICE_NAME=progressor
|
||||
OTP_VERSION=25.3
|
||||
REBAR_VERSION=3.18
|
||||
THRIFT_VERSION=0.14.2.3
|
||||
CONFLUENT_PLATFORM_VERSION=5.1.2
|
19
benchmark/base_bench/.gitignore
vendored
Normal file
19
benchmark/base_bench/.gitignore
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
.rebar3
|
||||
_*
|
||||
.eunit
|
||||
*.o
|
||||
*.beam
|
||||
*.plt
|
||||
*.swp
|
||||
*.swo
|
||||
.erlang.cookie
|
||||
ebin
|
||||
log
|
||||
erl_crash.dump
|
||||
.rebar
|
||||
logs
|
||||
_build
|
||||
.idea
|
||||
*.iml
|
||||
rebar3.crashdump
|
||||
*~
|
0
benchmark/base_bench/.image.dev
Normal file
0
benchmark/base_bench/.image.dev
Normal file
17
benchmark/base_bench/Dockerfile.dev
Normal file
17
benchmark/base_bench/Dockerfile.dev
Normal file
@ -0,0 +1,17 @@
|
||||
ARG OTP_VERSION
|
||||
|
||||
FROM docker.io/library/erlang:${OTP_VERSION}
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
# Install thrift compiler
|
||||
ARG THRIFT_VERSION
|
||||
ARG TARGETARCH=amd64
|
||||
RUN wget -q -O- "https://github.com/valitydev/thrift/releases/download/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}-linux-${TARGETARCH}.tar.gz" \
|
||||
| tar -xvz -C /usr/local/bin/
|
||||
|
||||
# Set env
|
||||
ENV CHARSET=UTF-8
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
# Set runtime
|
||||
CMD ["/bin/bash"]
|
191
benchmark/base_bench/LICENSE
Normal file
191
benchmark/base_bench/LICENSE
Normal file
@ -0,0 +1,191 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2024, Anonymous <anonymous@example.org>.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
119
benchmark/base_bench/Makefile
Normal file
119
benchmark/base_bench/Makefile
Normal file
@ -0,0 +1,119 @@
|
||||
# HINT
|
||||
# Use this file to override variables here.
|
||||
# For example, to run with podman put `DOCKER=podman` there.
|
||||
-include Makefile.env
|
||||
|
||||
# NOTE
|
||||
# Variables specified in `.env` file are used to pick and setup specific
|
||||
# component versions, both when building a development image and when running
|
||||
# CI workflows on GH Actions. This ensures that tasks run with `wc-` prefix
|
||||
# (like `wc-dialyze`) are reproducible between local machine and CI runners.
|
||||
DOTENV := $(shell grep -v '^\#' .env)
|
||||
|
||||
# Development images
|
||||
DEV_IMAGE_TAG = $(TEST_CONTAINER_NAME)-dev
|
||||
DEV_IMAGE_ID = $(file < .image.dev)
|
||||
|
||||
DOCKER ?= docker
|
||||
DOCKERCOMPOSE ?= docker-compose
|
||||
DOCKERCOMPOSE_W_ENV = DEV_IMAGE_TAG=$(DEV_IMAGE_TAG) $(DOCKERCOMPOSE) -f docker-compose.yml
|
||||
REBAR ?= rebar3
|
||||
TEST_CONTAINER_NAME ?= testrunner
|
||||
|
||||
all: compile
|
||||
|
||||
.PHONY: dev-image clean-dev-image wc-shell test
|
||||
|
||||
dev-image: .image.dev
|
||||
|
||||
get-submodules:
|
||||
git submodule init
|
||||
git submodule update
|
||||
|
||||
.image.dev: get-submodules Dockerfile.dev .env
|
||||
env $(DOTENV) $(DOCKERCOMPOSE_W_ENV) build $(TEST_CONTAINER_NAME)
|
||||
$(DOCKER) image ls -q -f "reference=$(DEV_IMAGE_ID)" | head -n1 > $@
|
||||
|
||||
clean-dev-image:
|
||||
ifneq ($(DEV_IMAGE_ID),)
|
||||
$(DOCKER) image rm -f $(DEV_IMAGE_TAG)
|
||||
rm .image.dev
|
||||
endif
|
||||
|
||||
DOCKER_WC_OPTIONS := -v $(PWD):$(PWD) --workdir $(PWD)
|
||||
DOCKER_WC_EXTRA_OPTIONS ?= --rm
|
||||
DOCKER_RUN = $(DOCKER) run -t $(DOCKER_WC_OPTIONS) $(DOCKER_WC_EXTRA_OPTIONS)
|
||||
|
||||
DOCKERCOMPOSE_RUN = $(DOCKERCOMPOSE_W_ENV) run --rm $(DOCKER_WC_OPTIONS)
|
||||
|
||||
# Utility tasks
|
||||
|
||||
wc-shell: dev-image
|
||||
$(DOCKER_RUN) --interactive --tty $(DEV_IMAGE_TAG)
|
||||
|
||||
wc-%: dev-image
|
||||
$(DOCKER_RUN) $(DEV_IMAGE_TAG) make $*
|
||||
|
||||
wdeps-shell: dev-image
|
||||
$(DOCKERCOMPOSE_RUN) $(TEST_CONTAINER_NAME) su; \
|
||||
$(DOCKERCOMPOSE_W_ENV) down
|
||||
|
||||
wdeps-%: dev-image
|
||||
$(DOCKERCOMPOSE_RUN) -T $(TEST_CONTAINER_NAME) make $(if $(MAKE_ARGS),$(MAKE_ARGS) $*,$*); \
|
||||
res=$$?; \
|
||||
$(DOCKERCOMPOSE_W_ENV) down; \
|
||||
exit $$res
|
||||
|
||||
# Submodules tasks
|
||||
|
||||
make_psql_migration:
|
||||
make -C psql-migration/
|
||||
mkdir -p bin
|
||||
mkdir -p migrations
|
||||
cp ./psql-migration/_build/default/bin/psql_migration ./bin
|
||||
|
||||
# Rebar tasks
|
||||
|
||||
rebar-shell:
|
||||
$(REBAR) shell
|
||||
|
||||
compile:
|
||||
$(REBAR) compile
|
||||
|
||||
xref:
|
||||
$(REBAR) xref
|
||||
|
||||
lint:
|
||||
$(REBAR) lint
|
||||
|
||||
check-format:
|
||||
$(REBAR) fmt -c
|
||||
|
||||
dialyze:
|
||||
$(REBAR) as test dialyzer
|
||||
|
||||
release:
|
||||
$(REBAR) as prod release
|
||||
|
||||
eunit:
|
||||
$(REBAR) eunit --cover
|
||||
|
||||
common-test:
|
||||
$(REBAR) ct --cover
|
||||
|
||||
cover:
|
||||
$(REBAR) covertool generate
|
||||
|
||||
format:
|
||||
$(REBAR) fmt -w
|
||||
|
||||
clean:
|
||||
$(REBAR) clean
|
||||
|
||||
distclean: clean-build-image
|
||||
rm -rf _build
|
||||
|
||||
test: eunit common-test
|
||||
|
||||
cover-report:
|
||||
$(REBAR) cover
|
8
benchmark/base_bench/README.md
Normal file
8
benchmark/base_bench/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
base_bench
|
||||
=====
|
||||
|
||||
```
|
||||
$ rebar3 shell
|
||||
|
||||
1> base_bench:start(ProcessCount, Duration).
|
||||
```
|
55
benchmark/base_bench/config/sys.config
Normal file
55
benchmark/base_bench/config/sys.config
Normal file
@ -0,0 +1,55 @@
|
||||
[
|
||||
|
||||
{progressor, [
|
||||
{defaults, #{
|
||||
storage => #{
|
||||
client => prg_pg_backend,
|
||||
options => #{
|
||||
pool => default_pool
|
||||
}
|
||||
},
|
||||
retry_policy => #{
|
||||
initial_timeout => 3,
|
||||
backoff_coefficient => 1.2,
|
||||
max_timeout => 180,
|
||||
max_attempts => 2,
|
||||
non_retryable_errors => []
|
||||
},
|
||||
task_scan_timeout => 15, %% seconds
|
||||
worker_pool_size => 200,
|
||||
process_step_timeout => 30 %% seconds
|
||||
}},
|
||||
|
||||
{namespaces, #{
|
||||
default => #{
|
||||
processor => #{
|
||||
client => base_bench_processor,
|
||||
options => #{}
|
||||
}
|
||||
}
|
||||
}}
|
||||
]},
|
||||
|
||||
{epg_connector, [
|
||||
{databases, #{
|
||||
progressor_db => #{
|
||||
host => "postgres",
|
||||
port => 5432,
|
||||
database => "progressor_db",
|
||||
username => "progressor",
|
||||
password => "progressor"
|
||||
}
|
||||
}},
|
||||
{pools, #{
|
||||
default_pool => #{
|
||||
database => progressor_db,
|
||||
size => 200
|
||||
}
|
||||
}}
|
||||
]},
|
||||
|
||||
{prometheus, [
|
||||
{collectors, [default]}
|
||||
]}
|
||||
|
||||
].
|
46
benchmark/base_bench/docker-compose.yml
Normal file
46
benchmark/base_bench/docker-compose.yml
Normal file
@ -0,0 +1,46 @@
|
||||
services:
|
||||
testrunner:
|
||||
image: $DEV_IMAGE_TAG
|
||||
environment:
|
||||
WORK_DIR: $PWD
|
||||
build:
|
||||
dockerfile: Dockerfile.dev
|
||||
context: .
|
||||
args:
|
||||
OTP_VERSION: $OTP_VERSION
|
||||
THRIFT_VERSION: $THRIFT_VERSION
|
||||
volumes:
|
||||
- .:$PWD
|
||||
hostname: progressor
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
working_dir: $PWD
|
||||
|
||||
postgres:
|
||||
image: postgres:15-bookworm
|
||||
command: -c 'max_connections=250'
|
||||
environment:
|
||||
POSTGRES_DB: "progressor_db"
|
||||
POSTGRES_USER: "progressor"
|
||||
POSTGRES_PASSWORD: "progressor"
|
||||
PGDATA: "/tmp/postgresql/data/pgdata"
|
||||
volumes:
|
||||
- progressor-data:/tmp/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U progressor -d progressor_db"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '4'
|
||||
memory: 8G
|
||||
|
||||
volumes:
|
||||
progressor-data:
|
9
benchmark/base_bench/rebar.config
Normal file
9
benchmark/base_bench/rebar.config
Normal file
@ -0,0 +1,9 @@
|
||||
{erl_opts, [debug_info]}.
|
||||
{deps, [
|
||||
{progressor, {git, "https://github.com/valitydev/progressor.git", {branch, "epic/TD-927/progressor-prototype"}}}
|
||||
]}.
|
||||
|
||||
{shell, [
|
||||
{config, "config/sys.config"},
|
||||
{apps, [base_bench, progressor]}
|
||||
]}.
|
17
benchmark/base_bench/src/base_bench.app.src
Normal file
17
benchmark/base_bench/src/base_bench.app.src
Normal file
@ -0,0 +1,17 @@
|
||||
{application, base_bench,
|
||||
[{description, "An OTP application"},
|
||||
{vsn, "0.1.0"},
|
||||
{registered, []},
|
||||
{mod, {base_bench_app, []}},
|
||||
{applications,
|
||||
[kernel,
|
||||
stdlib,
|
||||
herd,
|
||||
progressor
|
||||
]},
|
||||
{env,[]},
|
||||
{modules, []},
|
||||
|
||||
{licenses, ["MIT"]},
|
||||
{links, []}
|
||||
]}.
|
83
benchmark/base_bench/src/base_bench.erl
Normal file
83
benchmark/base_bench/src/base_bench.erl
Normal file
@ -0,0 +1,83 @@
|
||||
-module(base_bench).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([start/2]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
|
||||
code_change/3]).
|
||||
|
||||
-define(SERVER, ?MODULE).
|
||||
-define(NS, default).
|
||||
|
||||
-record(base_bench_state, {ids, duration}).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Spawning and gen_server implementation
|
||||
%%%===================================================================
|
||||
|
||||
start(ProcCount, DurationSec) ->
|
||||
gen_server:start_link({local, ?SERVER}, ?MODULE, [ProcCount, DurationSec], []).
|
||||
|
||||
init([ProcCount, DurationSec]) ->
|
||||
IDs = start_processes(ProcCount, DurationSec),
|
||||
io:format(user, "Started: ~p~n", [calendar:system_time_to_rfc3339(erlang:system_time(second))]),
|
||||
erlang:start_timer(DurationSec * 1000, self(), finish),
|
||||
{ok, #base_bench_state{ids = IDs, duration = DurationSec}}.
|
||||
|
||||
handle_call(_Request, _From, State = #base_bench_state{}) ->
|
||||
{reply, ok, State}.
|
||||
|
||||
handle_cast(_Request, State = #base_bench_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
handle_info({timeout, _TimerRef, finish}, State = #base_bench_state{ids = IDs, duration = Duration}) ->
|
||||
io:format(user, "Stopping: ~p~n", [calendar:system_time_to_rfc3339(erlang:system_time(second))]),
|
||||
{EvCountsList, ErrCount} = stop_processes(IDs),
|
||||
io:format(user, "Finish: ~p~n", [calendar:system_time_to_rfc3339(erlang:system_time(second))]),
|
||||
Max = lists:max(EvCountsList),
|
||||
Min = lists:min(EvCountsList),
|
||||
Avg = lists:sum(EvCountsList) / erlang:length(EvCountsList),
|
||||
Rate = lists:sum(EvCountsList) / Duration,
|
||||
io:format(user, "Max: ~p~nMin: ~p~nAvg: ~p~nRate: ~p~nErrors: ~p~n", [Max, Min, Avg, Rate, ErrCount]),
|
||||
{noreply, State};
|
||||
handle_info(_Info, State = #base_bench_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State = #base_bench_state{}) ->
|
||||
ok.
|
||||
|
||||
code_change(_OldVsn, State = #base_bench_state{}, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
||||
|
||||
start_processes(N, Duration) ->
|
||||
lists:foldl(fun(_N, Acc) -> [start_process(Duration) | Acc] end, [], lists:seq(1, N)).
|
||||
|
||||
start_process(Duration) ->
|
||||
Id = gen_id(),
|
||||
_ = spawn(progressor, init, [#{ns => ?NS, id => Id, args => term_to_binary(Duration)}]),
|
||||
Id.
|
||||
%%
|
||||
|
||||
stop_processes(IDs) ->
|
||||
lists:foldl(fun(Id, Acc) ->
|
||||
do_call(#{ns => ?NS, id => Id, args => <<>>}, Acc)
|
||||
end, {[], 0}, IDs).
|
||||
%%
|
||||
|
||||
do_call(Req, {Evs, Errs}) ->
|
||||
try progressor:call(Req) of
|
||||
{ok, EventsCount} ->
|
||||
{[EventsCount | Evs], Errs};
|
||||
{error, _} ->
|
||||
{Evs, Errs + 1}
|
||||
catch
|
||||
_Ex:_Er ->
|
||||
{Evs, Errs + 1}
|
||||
end.
|
||||
|
||||
gen_id() ->
|
||||
base64:encode(crypto:strong_rand_bytes(8)).
|
18
benchmark/base_bench/src/base_bench_app.erl
Normal file
18
benchmark/base_bench/src/base_bench_app.erl
Normal file
@ -0,0 +1,18 @@
|
||||
%%%-------------------------------------------------------------------
|
||||
%% @doc base_bench public API
|
||||
%% @end
|
||||
%%%-------------------------------------------------------------------
|
||||
|
||||
-module(base_bench_app).
|
||||
|
||||
-behaviour(application).
|
||||
|
||||
-export([start/2, stop/1]).
|
||||
|
||||
start(_StartType, _StartArgs) ->
|
||||
base_bench_sup:start_link().
|
||||
|
||||
stop(_State) ->
|
||||
ok.
|
||||
|
||||
%% internal functions
|
45
benchmark/base_bench/src/base_bench_processor.erl
Normal file
45
benchmark/base_bench/src/base_bench_processor.erl
Normal file
@ -0,0 +1,45 @@
|
||||
-module(base_bench_processor).
|
||||
|
||||
-export([process/3]).
|
||||
|
||||
process({init, Args, _Process}, _Opts, _Ctx) ->
|
||||
Fin = erlang:system_time(second) + binary_to_term(Args),
|
||||
Result = #{
|
||||
metadata => #{finish => Fin},
|
||||
events => [event(1)],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
{ok, Result};
|
||||
%%
|
||||
process({timeout, _Args, #{history := History, metadata := Meta} = _Process}, _Opts, _Ctx) ->
|
||||
%Random = rand:uniform(40),
|
||||
%timer:sleep(60 + Random),
|
||||
#{finish := FinishTime} = Meta,
|
||||
Action = case FinishTime > erlang:system_time(second) of
|
||||
true -> #{set_timer => erlang:system_time(second)};
|
||||
false -> unset_timer
|
||||
end,
|
||||
%Action = #{set_timer => erlang:system_time(second)},
|
||||
NextId = erlang:length(History) + 1,
|
||||
Result = #{
|
||||
events => [event(NextId)],
|
||||
action => Action
|
||||
},
|
||||
{ok, Result};
|
||||
%%
|
||||
process({call, _Args, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
response => erlang:length(History),
|
||||
events => [],
|
||||
action => unset_timer
|
||||
},
|
||||
{ok, Result}.
|
||||
%%
|
||||
|
||||
event(Id) ->
|
||||
#{
|
||||
event_id => Id,
|
||||
timestamp => erlang:system_time(second),
|
||||
metadata => #{<<"format_version">> => 1},
|
||||
payload => erlang:term_to_binary({bin, crypto:strong_rand_bytes(64)})
|
||||
}.
|
35
benchmark/base_bench/src/base_bench_sup.erl
Normal file
35
benchmark/base_bench/src/base_bench_sup.erl
Normal file
@ -0,0 +1,35 @@
|
||||
%%%-------------------------------------------------------------------
|
||||
%% @doc base_bench top level supervisor.
|
||||
%% @end
|
||||
%%%-------------------------------------------------------------------
|
||||
|
||||
-module(base_bench_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
-define(SERVER, ?MODULE).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?SERVER}, ?MODULE, []).
|
||||
|
||||
%% sup_flags() = #{strategy => strategy(), % optional
|
||||
%% intensity => non_neg_integer(), % optional
|
||||
%% period => pos_integer()} % optional
|
||||
%% child_spec() = #{id => child_id(), % mandatory
|
||||
%% start => mfargs(), % mandatory
|
||||
%% restart => restart(), % optional
|
||||
%% shutdown => shutdown(), % optional
|
||||
%% type => worker(), % optional
|
||||
%% modules => modules()} % optional
|
||||
init([]) ->
|
||||
SupFlags = #{strategy => one_for_all,
|
||||
intensity => 0,
|
||||
period => 1},
|
||||
ChildSpecs = [],
|
||||
{ok, {SupFlags, ChildSpecs}}.
|
||||
|
||||
%% internal functions
|
99
config/sys.config
Normal file
99
config/sys.config
Normal file
@ -0,0 +1,99 @@
|
||||
[
|
||||
{progressor, [
|
||||
{defaults, #{
|
||||
storage => #{
|
||||
%% required
|
||||
client => prg_pg_backend2,
|
||||
%% backend specific
|
||||
options => #{
|
||||
%% see section 'epg_connector' 'pools'
|
||||
pool => default_pool,
|
||||
scan_pool => default_scan_pool,
|
||||
front_pool => default_front_pool
|
||||
}
|
||||
},
|
||||
%% optional
|
||||
retry_policy => #{
|
||||
initial_timeout => 3, %% seconds
|
||||
backoff_coefficient => 1.2,
|
||||
max_timeout => 180, %% seconds
|
||||
max_attempts => 2,
|
||||
non_retryable_errors => [
|
||||
some_reason,
|
||||
any_term,
|
||||
<<"Error message">>,
|
||||
{temporary, unavilable}
|
||||
]
|
||||
},
|
||||
%% optional, default process_step_timeout div 2
|
||||
task_scan_timeout => 10, %% seconds
|
||||
%% optional, default 100
|
||||
worker_pool_size => 100,
|
||||
%% optional, default 60 sec
|
||||
process_step_timeout => 30 %% seconds
|
||||
}},
|
||||
|
||||
{namespaces, #{
|
||||
'default/default' => #{
|
||||
%% required
|
||||
processor => #{
|
||||
%% required
|
||||
client => echo_processor,
|
||||
%% client specific
|
||||
options => #{}
|
||||
},
|
||||
notifier => #{
|
||||
client => default_kafka_client,
|
||||
options => #{
|
||||
topic => <<"default_topic">>,
|
||||
lifecycle_topic => <<"default_lifecycle_topic">>
|
||||
}
|
||||
}
|
||||
}
|
||||
}}
|
||||
]},
|
||||
|
||||
%%
|
||||
{epg_connector, [
|
||||
{databases, #{
|
||||
progressor_db => #{
|
||||
host => "postgres",
|
||||
port => 5432,
|
||||
database => "progressor_db",
|
||||
username => "progressor",
|
||||
password => "progressor"
|
||||
}
|
||||
}},
|
||||
{pools, #{
|
||||
default_pool => #{
|
||||
%% see databases keys
|
||||
database => progressor_db,
|
||||
size => 100
|
||||
},
|
||||
default_scan_pool => #{
|
||||
database => progressor_db,
|
||||
size => 2
|
||||
},
|
||||
default_front_pool => #{
|
||||
database => progressor_db,
|
||||
size => 10
|
||||
}
|
||||
}}
|
||||
]},
|
||||
|
||||
{canal, [
|
||||
{url, "http://vault"},
|
||||
{engine, kvv2}
|
||||
]},
|
||||
|
||||
{brod, [
|
||||
{clients, [
|
||||
{default_kafka_client, [
|
||||
{endpoints, [{"kafka1", 9092}, {"kafka2", 9092}, {"kafka3", 9092}]},
|
||||
{auto_start_producers, true},
|
||||
{default_producer_config, []}
|
||||
]}
|
||||
]}
|
||||
]}
|
||||
|
||||
].
|
6
config/vm.args
Normal file
6
config/vm.args
Normal file
@ -0,0 +1,6 @@
|
||||
-sname progressor
|
||||
|
||||
-setcookie progressor_cookie
|
||||
|
||||
+K true
|
||||
+A30
|
91
docker-compose.yml
Normal file
91
docker-compose.yml
Normal file
@ -0,0 +1,91 @@
|
||||
services:
|
||||
|
||||
testrunner:
|
||||
image: $DEV_IMAGE_TAG
|
||||
environment:
|
||||
WORK_DIR: $PWD
|
||||
build:
|
||||
dockerfile: Dockerfile.dev
|
||||
context: .
|
||||
args:
|
||||
OTP_VERSION: $OTP_VERSION
|
||||
THRIFT_VERSION: $THRIFT_VERSION
|
||||
volumes:
|
||||
- .:$PWD
|
||||
hostname: progressor
|
||||
depends_on:
|
||||
kafka1:
|
||||
condition: service_healthy
|
||||
kafka2:
|
||||
condition: service_healthy
|
||||
kafka3:
|
||||
condition: service_healthy
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
working_dir: $PWD
|
||||
|
||||
postgres:
|
||||
image: postgres:15-bookworm
|
||||
environment:
|
||||
POSTGRES_DB: "progressor_db"
|
||||
POSTGRES_USER: "progressor"
|
||||
POSTGRES_PASSWORD: "progressor"
|
||||
PGDATA: "/tmp/postgresql/data/pgdata"
|
||||
volumes:
|
||||
- progressor-data:/tmp/postgresql/data
|
||||
ports:
|
||||
- "5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U progressor -d progressor_db"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 4G
|
||||
|
||||
zookeeper:
|
||||
image: docker.io/confluentinc/cp-zookeeper:${CONFLUENT_PLATFORM_VERSION}
|
||||
healthcheck:
|
||||
test: echo ruok | nc 127.0.0.1 2181 || exit -1
|
||||
interval: 5s
|
||||
timeout: 240s #🍎
|
||||
retries: 50
|
||||
environment:
|
||||
KAFKA_OPTS: "-Dzookeeper.4lw.commands.whitelist=ruok"
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
|
||||
kafka1: &kafka-broker
|
||||
image: docker.io/confluentinc/cp-kafka:${CONFLUENT_PLATFORM_VERSION}
|
||||
depends_on:
|
||||
- zookeeper
|
||||
healthcheck:
|
||||
test: ["CMD", "kafka-topics", "--list", "--zookeeper", "zookeeper:2181"]
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:9092
|
||||
|
||||
kafka2:
|
||||
<<: *kafka-broker
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 2
|
||||
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka2:9092
|
||||
|
||||
kafka3:
|
||||
<<: *kafka-broker
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 3
|
||||
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka3:9092
|
||||
|
||||
volumes:
|
||||
progressor-data:
|
152
include/progressor.hrl
Normal file
152
include/progressor.hrl
Normal file
@ -0,0 +1,152 @@
|
||||
|
||||
%%%
|
||||
%%% Base entities
|
||||
%%%
|
||||
-type process() :: #{
|
||||
process_id := id(),
|
||||
status := process_status(),
|
||||
detail => binary(),
|
||||
aux_state => binary(),
|
||||
metadata => map(),
|
||||
history => [event()],
|
||||
corrupted_by => task_id()
|
||||
}.
|
||||
|
||||
-type task() :: #{
|
||||
task_id => task_id(),
|
||||
process_id := id(),
|
||||
task_type := task_type(),
|
||||
status := task_status(),
|
||||
scheduled_time := timestamp_sec(),
|
||||
running_time => timestamp_sec(),
|
||||
finished_time => timestamp_sec(),
|
||||
args => binary(),
|
||||
metadata => map(),
|
||||
idempotency_key => binary(),
|
||||
response => binary(),
|
||||
blocked_task => task_id(),
|
||||
last_retry_interval => non_neg_integer(),
|
||||
attempts_count => non_neg_integer(),
|
||||
context => binary()
|
||||
}.
|
||||
|
||||
-type event() :: #{
|
||||
process_id := id(),
|
||||
task_id := task_id(),
|
||||
event_id := event_id(),
|
||||
timestamp := timestamp_sec(),
|
||||
metadata => #{format => pos_integer()},
|
||||
payload := binary()
|
||||
}.
|
||||
|
||||
%%%
|
||||
%%% Config options
|
||||
%%%
|
||||
-type namespace_opts() :: #{
|
||||
namespace := id(),
|
||||
storage := storage_opts(),
|
||||
processor := processor_opts(),
|
||||
notifier => notifier_opts(),
|
||||
retry_policy => retry_policy(),
|
||||
worker_pool_size => pos_integer(),
|
||||
process_step_timeout => timeout_sec(),
|
||||
task_scan_timeout => timeout_sec(),
|
||||
last_timer_repair => boolean()
|
||||
}.
|
||||
|
||||
-type notifier_opts() :: #{
|
||||
client => atom(),
|
||||
options => term()
|
||||
}.
|
||||
|
||||
-type processor_opts() :: #{
|
||||
client := module(),
|
||||
options => term()
|
||||
}.
|
||||
|
||||
-type storage_handler() :: prg_pg_backend.
|
||||
|
||||
-type storage_opts() :: #{
|
||||
client := storage_handler(),
|
||||
options => term()
|
||||
}.
|
||||
|
||||
-type retry_policy() :: #{
|
||||
initial_timeout => timeout_sec(),
|
||||
backoff_coefficient => float(),
|
||||
max_timeout => timeout_sec(),
|
||||
max_attempts => pos_integer(),
|
||||
non_retryable_errors => [term()]
|
||||
}.
|
||||
|
||||
%%%
|
||||
%%% Other types
|
||||
%%%
|
||||
-type id() :: binary().
|
||||
-type event_id() :: pos_integer().
|
||||
-type task_id() :: pos_integer().
|
||||
|
||||
-type process_status() :: binary().
|
||||
% <<"running">> | <<"error">>
|
||||
|
||||
-type task_status() :: binary().
|
||||
% <<"waiting">> | <<"running">> | <<"blocked">> | <<"error">> | <<"finished">> | <<"cancelled">>
|
||||
|
||||
-type task_type() :: binary().
|
||||
% <<"init">> | <<"call">> | <<"notify">> | <<"repair">> | <<"timeout">>
|
||||
|
||||
-type task_t() :: init | call | repair | notify | timeout | remove.
|
||||
-type task_origin() :: {pid(), reference()}.
|
||||
-type task_header() :: {task_t(), task_origin() | undefined}.
|
||||
|
||||
-type namespace_id() :: atom().
|
||||
|
||||
-type recipient() :: internal | external.
|
||||
|
||||
-type history_range() :: #{
|
||||
offset => non_neg_integer(),
|
||||
limit => pos_integer()
|
||||
}.
|
||||
|
||||
-type process_result() ::
|
||||
{ok, #{
|
||||
events := [event()],
|
||||
action => action(),
|
||||
response => term(),
|
||||
aux_state => binary(),
|
||||
metadata => map()
|
||||
}}
|
||||
| {error, binary()}.
|
||||
|
||||
-type action() :: #{set_timer := timestamp_sec(), remove => true} | unset_timer.
|
||||
|
||||
-type task_result() :: #{
|
||||
task_id := task_id(),
|
||||
status := task_status(),
|
||||
finished_time => timestamp_sec(),
|
||||
response => binary()
|
||||
}.
|
||||
|
||||
-type timestamp_ms() :: non_neg_integer().
|
||||
-type timestamp_sec() :: non_neg_integer().
|
||||
-type timeout_sec() :: non_neg_integer().
|
||||
|
||||
%%%
|
||||
%%% Constants
|
||||
%%%
|
||||
-define(DEFAULT_STEP_TIMEOUT_SEC, 60).
|
||||
|
||||
-define(DEFAULT_RETRY_POLICY, #{
|
||||
initial_timeout => 5, %% second
|
||||
backoff_coefficient => 1.0,
|
||||
max_attempts => 3
|
||||
}).
|
||||
|
||||
-define(DEFAULT_WORKER_POOL_SIZE, 10).
|
||||
|
||||
-define(EPOCH_DIFF, 62167219200).
|
||||
|
||||
-define(NEW_PROCESS(ID), #{
|
||||
process_id => ProcessId,
|
||||
status => <<"running">>
|
||||
}).
|
53
priv/schemas/postgres-schema.sql
Normal file
53
priv/schemas/postgres-schema.sql
Normal file
@ -0,0 +1,53 @@
|
||||
CREATE TYPE process_status AS ENUM ('running', 'error');
|
||||
CREATE TYPE task_status AS ENUM ('waiting', 'running', 'blocked', 'error', 'finished', 'cancelled');
|
||||
CREATE TYPE task_type AS ENUM ('init', 'timeout', 'call', 'notify', 'repair', 'remove');
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_processes(
|
||||
"process_id" VARCHAR(80) PRIMARY KEY,
|
||||
"status" process_status NOT NULL,
|
||||
"detail" TEXT,
|
||||
"aux_state" BYTEA,
|
||||
"metadata" JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_tasks(
|
||||
"task_id" BIGSERIAL PRIMARY KEY,
|
||||
"process_id" VARCHAR(80) NOT NULL,
|
||||
"task_type" task_type NOT NULL,
|
||||
"status" task_status NOT NULL,
|
||||
"scheduled_time" TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
"running_time" TIMESTAMP WITH TIME ZONE,
|
||||
"finished_time" TIMESTAMP WITH TIME ZONE,
|
||||
"args" BYTEA,
|
||||
"metadata" JSONB,
|
||||
"idempotency_key" VARCHAR(80) UNIQUE,
|
||||
"response" BYTEA,
|
||||
"blocked_task" BIGINT REFERENCES default_tasks ("task_id"),
|
||||
"last_retry_interval" INTEGER NOT NULL,
|
||||
"attempts_count" SMALLINT NOT NULL,
|
||||
"context" BYTEA,
|
||||
FOREIGN KEY ("process_id") REFERENCES default_processes ("process_id")
|
||||
);
|
||||
|
||||
ALTER TABLE default_processes ADD COLUMN IF NOT EXISTS "corrupted_by" BIGINT REFERENCES default_tasks("task_id");
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "default_locks"(
|
||||
"process_id" VARCHAR(80) PRIMARY KEY,
|
||||
"task_id" BIGINT NOT NULL,
|
||||
FOREIGN KEY ("process_id") REFERENCES "default_processes" ("process_id"),
|
||||
FOREIGN KEY ("task_id") REFERENCES "default_tasks" ("task_id")
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_events(
|
||||
"process_id" VARCHAR(80) NOT NULL,
|
||||
"task_id" BIGINT NOT NULL,
|
||||
"event_id" SMALLINT NOT NULL,
|
||||
"timestamp" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
"metadata" JSONB,
|
||||
"payload" BYTEA NOT NULL,
|
||||
PRIMARY KEY ("process_id", "event_id"),
|
||||
FOREIGN KEY ("process_id") REFERENCES default_processes ("process_id"),
|
||||
FOREIGN KEY ("task_id") REFERENCES default_tasks ("task_id")
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "process_idx" on default_events USING HASH ("process_id");
|
76
priv/schemas/postgres-schema2.sql
Normal file
76
priv/schemas/postgres-schema2.sql
Normal file
@ -0,0 +1,76 @@
|
||||
CREATE TYPE process_status AS ENUM ('running', 'error');
|
||||
CREATE TYPE task_status AS ENUM ('waiting', 'running', 'blocked', 'error', 'finished', 'cancelled');
|
||||
CREATE TYPE task_type AS ENUM ('init', 'timeout', 'call', 'notify', 'repair', 'remove');
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_processes(
|
||||
"process_id" VARCHAR(80) PRIMARY KEY,
|
||||
"status" process_status NOT NULL,
|
||||
"detail" TEXT,
|
||||
"aux_state" BYTEA,
|
||||
"metadata" JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_tasks(
|
||||
"task_id" BIGSERIAL PRIMARY KEY,
|
||||
"process_id" VARCHAR(80) NOT NULL,
|
||||
"task_type" task_type NOT NULL,
|
||||
"status" task_status NOT NULL,
|
||||
"scheduled_time" TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
"running_time" TIMESTAMP WITH TIME ZONE,
|
||||
"finished_time" TIMESTAMP WITH TIME ZONE,
|
||||
"args" BYTEA,
|
||||
"metadata" JSONB,
|
||||
"idempotency_key" VARCHAR(80) UNIQUE,
|
||||
"response" BYTEA,
|
||||
"last_retry_interval" INTEGER NOT NULL,
|
||||
"attempts_count" SMALLINT NOT NULL,
|
||||
"context" BYTEA,
|
||||
FOREIGN KEY ("process_id") REFERENCES default_processes ("process_id")
|
||||
);
|
||||
|
||||
ALTER TABLE default_processes ADD COLUMN IF NOT EXISTS "corrupted_by" BIGINT REFERENCES default_tasks("task_id");
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_schedule(
|
||||
"task_id" BIGINT PRIMARY KEY,
|
||||
"process_id" VARCHAR(80) NOT NULL,
|
||||
"task_type" task_type NOT NULL,
|
||||
"status" task_status NOT NULL,
|
||||
"scheduled_time" TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
"args" BYTEA,
|
||||
"metadata" JSONB,
|
||||
"last_retry_interval" INTEGER NOT NULL,
|
||||
"attempts_count" SMALLINT NOT NULL,
|
||||
"context" BYTEA,
|
||||
FOREIGN KEY ("process_id") REFERENCES default_processes ("process_id"),
|
||||
FOREIGN KEY ("task_id") REFERENCES "default_tasks" ("task_id")
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_running(
|
||||
"process_id" VARCHAR(80) PRIMARY KEY,
|
||||
"task_id" BIGINT NOT NULL,
|
||||
"task_type" task_type NOT NULL,
|
||||
"status" task_status NOT NULL,
|
||||
"scheduled_time" TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
"running_time" TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
"args" BYTEA,
|
||||
"metadata" JSONB,
|
||||
"last_retry_interval" INTEGER NOT NULL,
|
||||
"attempts_count" SMALLINT NOT NULL,
|
||||
"context" BYTEA,
|
||||
FOREIGN KEY ("process_id") REFERENCES default_processes ("process_id"),
|
||||
FOREIGN KEY ("task_id") REFERENCES "default_tasks" ("task_id")
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS default_events(
|
||||
"process_id" VARCHAR(80) NOT NULL,
|
||||
"task_id" BIGINT NOT NULL,
|
||||
"event_id" SMALLINT NOT NULL,
|
||||
"timestamp" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
"metadata" JSONB,
|
||||
"payload" BYTEA NOT NULL,
|
||||
PRIMARY KEY ("process_id", "event_id"),
|
||||
FOREIGN KEY ("process_id") REFERENCES default_processes ("process_id"),
|
||||
FOREIGN KEY ("task_id") REFERENCES default_tasks ("task_id")
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS "process_idx" on default_events USING HASH ("process_id");
|
25
rebar.config
Normal file
25
rebar.config
Normal file
@ -0,0 +1,25 @@
|
||||
{erl_opts, [debug_info]}.
|
||||
{deps, [
|
||||
{brod, "3.16.2"},
|
||||
{prometheus, "4.11.0"},
|
||||
{thrift, {git, "https://github.com/valitydev/thrift_erlang.git", {branch, "master"}}},
|
||||
{mg_proto, {git, "https://github.com/valitydev/machinegun-proto.git", {branch, "master"}}},
|
||||
{epg_connector, {git, "https://github.com/valitydev/epg_connector.git", {branch, "master"}}}
|
||||
]}.
|
||||
|
||||
{shell, [
|
||||
{config, "config/sys.config"},
|
||||
{apps, [brod, progressor]}
|
||||
]}.
|
||||
|
||||
{ct_opts, [
|
||||
{ct_hooks, [prg_ct_hook]}
|
||||
]}.
|
||||
|
||||
{profiles, [
|
||||
{test, [
|
||||
{deps, [
|
||||
{meck, "0.9.2"}
|
||||
]}
|
||||
]}
|
||||
]}.
|
63
rebar.lock
Normal file
63
rebar.lock
Normal file
@ -0,0 +1,63 @@
|
||||
{"1.2.0",
|
||||
[{<<"brod">>,{pkg,<<"brod">>,<<"3.16.2">>},0},
|
||||
{<<"canal">>,
|
||||
{git,"https://github.com/valitydev/canal",
|
||||
{ref,"621d3821cd0a6036fee75d8e3b2d17167f3268e4"}},
|
||||
1},
|
||||
{<<"crc32cer">>,{pkg,<<"crc32cer">>,<<"0.1.8">>},2},
|
||||
{<<"epg_connector">>,
|
||||
{git,"https://github.com/valitydev/epg_connector.git",
|
||||
{ref,"c4e447b434d83d2d8615453fb5a008a7f493a5c6"}},
|
||||
0},
|
||||
{<<"epgsql">>,
|
||||
{git,"https://github.com/epgsql/epgsql.git",
|
||||
{ref,"7ba52768cf0ea7d084df24d4275a88eef4db13c2"}},
|
||||
1},
|
||||
{<<"epgsql_pool">>,
|
||||
{git,"https://github.com/wgnet/epgsql_pool",
|
||||
{ref,"f5e492f73752950aab932a1662536e22fc00c717"}},
|
||||
1},
|
||||
{<<"herd">>,
|
||||
{git,"https://github.com/wgnet/herd.git",
|
||||
{ref,"934847589dcf5a6d2b02a1f546ffe91c04066f17"}},
|
||||
1},
|
||||
{<<"jsone">>,{pkg,<<"jsone">>,<<"1.8.0">>},2},
|
||||
{<<"jsx">>,{pkg,<<"jsx">>,<<"3.1.0">>},1},
|
||||
{<<"kafka_protocol">>,{pkg,<<"kafka_protocol">>,<<"4.0.3">>},1},
|
||||
{<<"mg_proto">>,
|
||||
{git,"https://github.com/valitydev/machinegun-proto.git",
|
||||
{ref,"3decc8f8b13c9cd1701deab47781aacddd7dbc92"}},
|
||||
0},
|
||||
{<<"pooler">>,{pkg,<<"pooler">>,<<"1.5.3">>},2},
|
||||
{<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.11.0">>},0},
|
||||
{<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1},
|
||||
{<<"snappyer">>,{pkg,<<"snappyer">>,<<"1.2.8">>},1},
|
||||
{<<"supervisor3">>,{pkg,<<"supervisor3">>,<<"1.1.11">>},1},
|
||||
{<<"thrift">>,
|
||||
{git,"https://github.com/valitydev/thrift_erlang.git",
|
||||
{ref,"3a60e5dc5bbd709495024f26e100b041c3547fd9"}},
|
||||
0}]}.
|
||||
[
|
||||
{pkg_hash,[
|
||||
{<<"brod">>, <<"6E33E9F781F4631CD33936D88C354E1E53F7FD299B1A1941448BF72EF7FAD314">>},
|
||||
{<<"crc32cer">>, <<"C6C2275C5FB60A95F4935D414F30B50EE9CFED494081C9B36EBB02EDFC2F48DB">>},
|
||||
{<<"jsone">>, <<"347FF1FA700E182E1F9C5012FA6D737B12C854313B9AE6954CA75D3987D6C06D">>},
|
||||
{<<"jsx">>, <<"D12516BAA0BB23A59BB35DCCAF02A1BD08243FCBB9EFE24F2D9D056CCFF71268">>},
|
||||
{<<"kafka_protocol">>, <<"E3C21CBA4B9CD278460EFA2B9AC6A5A9BB715FCFDA25945538E45B813F68F4B9">>},
|
||||
{<<"pooler">>, <<"898CD1FA301FC42D4A8ED598CE139B71CA85B54C16AB161152B5CC5FBDCFA1A8">>},
|
||||
{<<"prometheus">>, <<"B95F8DE8530F541BD95951E18E355A840003672E5EDA4788C5FA6183406BA29A">>},
|
||||
{<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>},
|
||||
{<<"snappyer">>, <<"201CE9067A33C71A6A5087C0C3A49A010B17112D461E6DF696C722DCB6D0934A">>},
|
||||
{<<"supervisor3">>, <<"D81CDEC31D102FDE407423E1D05B569572850DEEBED86B951D5233C387CBA80B">>}]},
|
||||
{pkg_hash_ext,[
|
||||
{<<"brod">>, <<"34433CBB24892F4576FA522F7BFA1D59F823C1D33B451F47E4649BD7AD12A772">>},
|
||||
{<<"crc32cer">>, <<"251499085482920DEB6C9B7AADABF9FB4C432F96ADD97AB42AEE4501E5B6F591">>},
|
||||
{<<"jsone">>, <<"08560B78624A12E0B5E7EC0271EC8CA38EF51F63D84D84843473E14D9B12618C">>},
|
||||
{<<"jsx">>, <<"0C5CC8FDC11B53CC25CF65AC6705AD39E54ECC56D1C22E4ADB8F5A53FB9427F3">>},
|
||||
{<<"kafka_protocol">>, <<"528E14A571B02B9D055DAB1160A2ACA8474F2EC627A115EFB828AAE2BCE6E5AA">>},
|
||||
{<<"pooler">>, <<"058D85C5081289B90E97E4DDDBC3BB5A3B4A19A728AB3BC88C689EFCC36A07C7">>},
|
||||
{<<"prometheus">>, <<"719862351AABF4DF7079B05DC085D2BBCBE3AC0AC3009E956671B1D5AB88247D">>},
|
||||
{<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>},
|
||||
{<<"snappyer">>, <<"35518E79A28548B56D8FD6AEE2F565F12F51C2D3D053F9CFA817C83BE88C4F3D">>},
|
||||
{<<"supervisor3">>, <<"E6C2DEDBCABCBA24995A218ACA12DB5E208B80D3252692B22EF0F1A266104B50">>}]}
|
||||
].
|
56
src/prg_namespace_sup.erl
Normal file
56
src/prg_namespace_sup.erl
Normal file
@ -0,0 +1,56 @@
|
||||
-module(prg_namespace_sup).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
%% API
|
||||
-export([start_link/1]).
|
||||
|
||||
%% Supervisor callbacks
|
||||
-export([init/1]).
|
||||
|
||||
%%%===================================================================
|
||||
%%% API functions
|
||||
%%%===================================================================
|
||||
|
||||
-spec(start_link({namespace_id(), namespace_opts()}) -> {ok, Pid :: pid()} | ignore | {error, Reason :: term()}).
|
||||
start_link({NsId, #{storage := StorageOpts}} = NS) ->
|
||||
ok = prg_storage:db_init(StorageOpts, NsId),
|
||||
RegName = prg_utils:registered_name(NsId, "_namespace_sup"),
|
||||
supervisor:start_link({local, RegName}, ?MODULE, NS).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Supervisor callbacks
|
||||
%%%===================================================================
|
||||
|
||||
init({NsId, _NsOpts} = NS) ->
|
||||
MaxRestarts = 1000,
|
||||
MaxSecondsBetweenRestarts = 3600,
|
||||
SupFlags = #{strategy => one_for_all,
|
||||
intensity => MaxRestarts,
|
||||
period => MaxSecondsBetweenRestarts},
|
||||
SchedulerSpec = #{
|
||||
id => prg_utils:registered_name(NsId, "_scheduler"),
|
||||
start => {prg_scheduler, start_link, [NS]}
|
||||
},
|
||||
ScannerSpec = #{
|
||||
id => prg_utils:registered_name(NsId, "_scanner"),
|
||||
start => {prg_scanner, start_link, [NS]}
|
||||
},
|
||||
WorkerSupSpec = #{
|
||||
id => prg_utils:registered_name(NsId, "_worker_sup"),
|
||||
start => {prg_worker_sup, start_link, [NS]},
|
||||
type => supervisor
|
||||
},
|
||||
Specs = [
|
||||
WorkerSupSpec,
|
||||
SchedulerSpec,
|
||||
ScannerSpec
|
||||
],
|
||||
|
||||
{ok, {SupFlags, Specs}}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
182
src/prg_notifier.erl
Normal file
182
src/prg_notifier.erl
Normal file
@ -0,0 +1,182 @@
|
||||
-module(prg_notifier).
|
||||
|
||||
-include("progressor.hrl").
|
||||
-include_lib("mg_proto/include/mg_proto_lifecycle_sink_thrift.hrl").
|
||||
-include_lib("mg_proto/include/mg_proto_event_sink_thrift.hrl").
|
||||
|
||||
-export([event_sink/3]).
|
||||
-export([lifecycle_sink/3]).
|
||||
|
||||
%% internals
|
||||
-export([serialize_content/1]).
|
||||
|
||||
-spec event_sink(namespace_opts(), id(), [event()]) -> ok | {error, _Reason} | no_return().
|
||||
event_sink(_NsOpts, _ID, []) ->
|
||||
ok;
|
||||
event_sink(
|
||||
#{
|
||||
namespace := NS,
|
||||
notifier := #{client := Client, options := #{topic := Topic}}
|
||||
},
|
||||
ID,
|
||||
Events
|
||||
) ->
|
||||
Batch = encode(fun serialize_eventsink/3, NS, ID, Events),
|
||||
ok = produce(Client, Topic, event_key(NS, ID), Batch),
|
||||
ok;
|
||||
event_sink(_NsOpts, _ID, _Events) ->
|
||||
ok.
|
||||
|
||||
-spec lifecycle_sink(namespace_opts(), task_t() | {error, _Reason}, id()) -> ok | {error, _Reason} | no_return().
|
||||
lifecycle_sink(
|
||||
#{
|
||||
namespace := NS,
|
||||
notifier := #{client := Client, options := #{lifecycle_topic := Topic}}
|
||||
},
|
||||
TaskType,
|
||||
ID
|
||||
) when
|
||||
TaskType =:= init;
|
||||
TaskType =:= repair;
|
||||
TaskType =:= remove;
|
||||
erlang:is_tuple(TaskType)
|
||||
->
|
||||
Batch = encode(fun serialize_lifecycle/3, NS, ID, [lifecycle_event(TaskType)]),
|
||||
ok = produce(Client, Topic, event_key(NS, ID), Batch),
|
||||
ok;
|
||||
lifecycle_sink(_NsOpts, _TaskType, _ID) ->
|
||||
ok.
|
||||
|
||||
%% Internal functions
|
||||
|
||||
encode(Encoder, NS, ID, Events) ->
|
||||
[
|
||||
#{
|
||||
key => event_key(NS, ID),
|
||||
value => Encoder(NS, ID, Event)
|
||||
} || Event <- Events
|
||||
].
|
||||
|
||||
produce(Client, Topic, PartitionKey, Batch) ->
|
||||
case brod:get_partitions_count(Client, Topic) of
|
||||
{ok, PartitionsCount} ->
|
||||
Partition = partition(PartitionsCount, PartitionKey),
|
||||
case brod:produce_sync_offset(Client, Topic, Partition, PartitionKey, Batch) of
|
||||
{ok, _Offset} ->
|
||||
ok;
|
||||
{error, _Reason} = Error ->
|
||||
Error
|
||||
end;
|
||||
{error, _Reason} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
partition(PartitionsCount, Key) ->
|
||||
erlang:phash2(Key) rem PartitionsCount.
|
||||
|
||||
event_key(NS, ID) ->
|
||||
<<NS/binary, " ", ID/binary>>.
|
||||
|
||||
%% eventsink serialization
|
||||
|
||||
serialize_eventsink(SourceNS, SourceID, Event) ->
|
||||
Codec = thrift_strict_binary_codec:new(),
|
||||
#{
|
||||
event_id := EventID,
|
||||
timestamp := Timestamp,
|
||||
payload := Payload
|
||||
} = Event,
|
||||
Content = erlang:binary_to_term(Payload),
|
||||
Metadata = maps:get(metadata, Event, #{}),
|
||||
Data =
|
||||
{event, #mg_evsink_MachineEvent{
|
||||
source_ns = SourceNS,
|
||||
source_id = SourceID,
|
||||
event_id = EventID,
|
||||
created_at = serialize_timestamp(Timestamp),
|
||||
format_version = maps:get(format_version, Metadata, undefined),
|
||||
data = Content
|
||||
}},
|
||||
Type = {struct, union, {mg_proto_event_sink_thrift, 'SinkEvent'}},
|
||||
case thrift_strict_binary_codec:write(Codec, Type, Data) of
|
||||
{ok, NewCodec} ->
|
||||
thrift_strict_binary_codec:close(NewCodec);
|
||||
{error, Reason} ->
|
||||
erlang:error({?MODULE, Reason})
|
||||
end.
|
||||
|
||||
serialize_content(null) ->
|
||||
{nl, #mg_msgpack_Nil{}};
|
||||
serialize_content(Boolean) when is_boolean(Boolean) ->
|
||||
{b, Boolean};
|
||||
serialize_content(Integer) when is_integer(Integer) ->
|
||||
{i, Integer};
|
||||
serialize_content(Float) when is_float(Float) ->
|
||||
{flt, Float};
|
||||
serialize_content({string, String}) ->
|
||||
{str, unicode:characters_to_binary(String, unicode)};
|
||||
serialize_content(Binary) when is_binary(Binary) ->
|
||||
{bin, Binary};
|
||||
serialize_content(Object) when is_map(Object) ->
|
||||
{obj,
|
||||
maps:fold(
|
||||
fun(K, V, Acc) -> maps:put(serialize_content(K), serialize_content(V), Acc) end,
|
||||
#{},
|
||||
Object
|
||||
)};
|
||||
serialize_content(Array) when is_list(Array) ->
|
||||
{arr, lists:map(fun serialize_content/1, Array)};
|
||||
serialize_content(Arg) ->
|
||||
erlang:error(badarg, [Arg]).
|
||||
|
||||
serialize_timestamp(TimestampSec) ->
|
||||
Str = calendar:system_time_to_rfc3339(TimestampSec, [{unit, second}, {offset, "Z"}]),
|
||||
erlang:list_to_binary(Str).
|
||||
|
||||
%% lifecycle serialization
|
||||
|
||||
lifecycle_event(init) ->
|
||||
{machine_lifecycle_created, #{occurred_at => erlang:system_time(second)}};
|
||||
lifecycle_event(repair) ->
|
||||
{machine_lifecycle_repaired, #{occurred_at => erlang:system_time(second)}};
|
||||
lifecycle_event(remove) ->
|
||||
{machine_lifecycle_removed, #{occurred_at => erlang:system_time(second)}};
|
||||
lifecycle_event({error, Reason}) ->
|
||||
{machine_lifecycle_failed, #{occurred_at => erlang:system_time(second), reason => Reason}}.
|
||||
|
||||
serialize_lifecycle(SourceNS, SourceID, Event) ->
|
||||
Codec = thrift_strict_binary_codec:new(),
|
||||
Data = serialize_lifecycle_event(SourceNS, SourceID, Event),
|
||||
Type = {struct, struct, {mg_proto_lifecycle_sink_thrift, 'LifecycleEvent'}},
|
||||
case thrift_strict_binary_codec:write(Codec, Type, Data) of
|
||||
{ok, NewCodec} ->
|
||||
thrift_strict_binary_codec:close(NewCodec);
|
||||
{error, Reason} ->
|
||||
erlang:error({?MODULE, Reason})
|
||||
end.
|
||||
|
||||
serialize_lifecycle_event(SourceNS, SourceID, {_, #{occurred_at := Timestamp}} = Event) ->
|
||||
#mg_lifesink_LifecycleEvent{
|
||||
machine_ns = SourceNS,
|
||||
machine_id = SourceID,
|
||||
created_at = serialize_timestamp(Timestamp),
|
||||
data = serialize_lifecycle_data(Event)
|
||||
}.
|
||||
|
||||
serialize_lifecycle_data({machine_lifecycle_created, _}) ->
|
||||
{machine, {created, #mg_lifesink_MachineLifecycleCreatedEvent{}}};
|
||||
serialize_lifecycle_data({machine_lifecycle_failed, #{reason := Reason}}) ->
|
||||
{machine,
|
||||
{status_changed, #mg_lifesink_MachineLifecycleStatusChangedEvent{
|
||||
new_status =
|
||||
{failed, #mg_stateproc_MachineStatusFailed{
|
||||
reason = Reason
|
||||
}}
|
||||
}}};
|
||||
serialize_lifecycle_data({machine_lifecycle_repaired, _}) ->
|
||||
{machine,
|
||||
{status_changed, #mg_lifesink_MachineLifecycleStatusChangedEvent{
|
||||
new_status = {working, #mg_stateproc_MachineStatusWorking{}}
|
||||
}}};
|
||||
serialize_lifecycle_data({machine_lifecycle_removed, _}) ->
|
||||
{machine, {removed, #mg_lifesink_MachineLifecycleRemovedEvent{}}}.
|
176
src/prg_scanner.erl
Normal file
176
src/prg_scanner.erl
Normal file
@ -0,0 +1,176 @@
|
||||
-module(prg_scanner).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([start_link/1]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
|
||||
code_change/3]).
|
||||
|
||||
-record(prg_scanner_state, {ns_id, ns_opts, rescan_timeout, step_timeout}).
|
||||
|
||||
-define(CALLS_SCAN_KEY, progressor_calls_scanning_duration_ms).
|
||||
-define(TIMERS_SCAN_KEY, progressor_timers_scanning_duration_ms).
|
||||
-define(ZOMBIE_COLLECTION_KEY, progressor_zombie_collection_duration_ms).
|
||||
|
||||
-dialyzer({nowarn_function, search_timers/3}).
|
||||
-dialyzer({nowarn_function, search_calls/3}).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Spawning and gen_server implementation
|
||||
%%%===================================================================
|
||||
|
||||
start_link({NsId, _NsOpts} = NS) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scanner"),
|
||||
gen_server:start_link({local, RegName}, ?MODULE, NS, []).
|
||||
|
||||
init({NsId, #{task_scan_timeout := RescanTimeoutSec, process_step_timeout := StepTimeoutSec} = Opts}) ->
|
||||
RescanTimeoutMs = RescanTimeoutSec * 1000,
|
||||
StepTimeoutMs = StepTimeoutSec * 1000,
|
||||
State = #prg_scanner_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = Opts,
|
||||
rescan_timeout = RescanTimeoutMs,
|
||||
step_timeout = StepTimeoutMs
|
||||
},
|
||||
_ = start_rescan_timers(RescanTimeoutMs),
|
||||
_ = start_rescan_calls((RescanTimeoutMs div 3) + 100),
|
||||
_ = start_zombie_collector(StepTimeoutMs),
|
||||
{ok, State}.
|
||||
|
||||
handle_call(_Request, _From, State) ->
|
||||
{reply, ok, State}.
|
||||
|
||||
handle_cast(_Request, State = #prg_scanner_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
handle_info(
|
||||
{timeout, _TimerRef, rescan_timers},
|
||||
State = #prg_scanner_state{ns_id = NsId, ns_opts = NsOpts, rescan_timeout = RescanTimeout}
|
||||
) ->
|
||||
case prg_scheduler:count_workers(NsId) of
|
||||
0 ->
|
||||
%% all workers is busy
|
||||
skip;
|
||||
N ->
|
||||
Calls = search_calls(N, NsId, NsOpts),
|
||||
Timers = search_timers(N - erlang:length(Calls), NsId, NsOpts),
|
||||
Tasks = Calls ++ Timers,
|
||||
lists:foreach(fun(#{task_type := Type} = Task) ->
|
||||
ok = prg_scheduler:push_task(NsId, header(Type), Task)
|
||||
end, Tasks)
|
||||
end,
|
||||
_ = start_rescan_timers(RescanTimeout),
|
||||
{noreply, State};
|
||||
handle_info(
|
||||
{timeout, _TimerRef, rescan_calls},
|
||||
State = #prg_scanner_state{ns_id = NsId, ns_opts = NsOpts, rescan_timeout = RescanTimeout}
|
||||
) ->
|
||||
case prg_scheduler:count_workers(NsId) of
|
||||
0 ->
|
||||
%% all workers is busy
|
||||
skip;
|
||||
N ->
|
||||
Calls = search_calls(N, NsId, NsOpts),
|
||||
lists:foreach(fun(#{task_type := Type} = Task) ->
|
||||
ok = prg_scheduler:push_task(NsId, header(Type), Task)
|
||||
end, Calls)
|
||||
end,
|
||||
_ = start_rescan_calls((RescanTimeout div 3) + 1),
|
||||
{noreply, State};
|
||||
handle_info(
|
||||
{timeout, _TimerRef, collect_zombie},
|
||||
State = #prg_scanner_state{ns_id = NsId, ns_opts = NsOpts, step_timeout = StepTimeout}
|
||||
) ->
|
||||
ok = collect_zombie(NsId, NsOpts),
|
||||
_ = start_zombie_collector(StepTimeout),
|
||||
{noreply, State};
|
||||
handle_info(_Info, State = #prg_scanner_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State = #prg_scanner_state{}) ->
|
||||
ok.
|
||||
|
||||
code_change(_OldVsn, State = #prg_scanner_state{}, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
||||
|
||||
start_rescan_timers(RescanTimeoutMs) ->
|
||||
RandomDelta = rand:uniform(RescanTimeoutMs div 5),
|
||||
erlang:start_timer(RescanTimeoutMs + RandomDelta, self(), rescan_timers).
|
||||
|
||||
start_rescan_calls(RescanTimeoutMs) ->
|
||||
RandomDelta = rand:uniform(RescanTimeoutMs div 5),
|
||||
erlang:start_timer(RescanTimeoutMs + RandomDelta, self(), rescan_calls).
|
||||
|
||||
start_zombie_collector(RescanTimeoutMs) ->
|
||||
RandomDelta = rand:uniform(RescanTimeoutMs div 5),
|
||||
erlang:start_timer(RescanTimeoutMs + RandomDelta, self(), collect_zombie).
|
||||
|
||||
search_timers(
|
||||
FreeWorkersCount,
|
||||
NsId,
|
||||
#{
|
||||
storage := StorageOpts,
|
||||
process_step_timeout := TimeoutSec,
|
||||
task_scan_timeout := ScanTimeoutSec
|
||||
}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
try
|
||||
prg_storage:search_timers(StorageOpts, NsId, TimeoutSec + ScanTimeoutSec, FreeWorkersCount) of
|
||||
Result when is_list(Result) ->
|
||||
Result;
|
||||
Unexpected ->
|
||||
logger:error("search timers error: ~p", [Unexpected]),
|
||||
[]
|
||||
catch
|
||||
Class:Reason:Trace ->
|
||||
logger:error("search timers exception: ~p", [[Class, Reason, Trace]]),
|
||||
[]
|
||||
end
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?TIMERS_SCAN_KEY, [erlang:atom_to_binary(NsId, utf8)]).
|
||||
|
||||
search_calls(
|
||||
FreeWorkersCount,
|
||||
NsId,
|
||||
#{storage := StorageOpts}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
try prg_storage:search_calls(StorageOpts, NsId, FreeWorkersCount) of
|
||||
Result when is_list(Result) ->
|
||||
Result;
|
||||
Unexpected ->
|
||||
logger:error("search calls error: ~p", [Unexpected]),
|
||||
[]
|
||||
catch
|
||||
Class:Reason:Trace ->
|
||||
logger:error("search calls exception: ~p", [[Class, Reason, Trace]]),
|
||||
[]
|
||||
end
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?CALLS_SCAN_KEY, [erlang:atom_to_binary(NsId, utf8)]).
|
||||
|
||||
collect_zombie(
|
||||
NsId,
|
||||
#{
|
||||
storage := StorageOpts,
|
||||
process_step_timeout := TimeoutSec,
|
||||
task_scan_timeout := ScanTimeoutSec
|
||||
}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
try prg_storage:collect_zombies(StorageOpts, NsId, TimeoutSec + ScanTimeoutSec)
|
||||
catch
|
||||
Class:Reason:Trace ->
|
||||
logger:error("zombie collection exception: ~p", [[Class, Reason, Trace]])
|
||||
end,
|
||||
ok
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?ZOMBIE_COLLECTION_KEY, [erlang:atom_to_binary(NsId, utf8)]).
|
||||
|
||||
header(Type) ->
|
||||
{erlang:binary_to_atom(Type), undefined}.
|
194
src/prg_scheduler.erl
Normal file
194
src/prg_scheduler.erl
Normal file
@ -0,0 +1,194 @@
|
||||
-module(prg_scheduler).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-export([start_link/1]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
|
||||
code_change/3]).
|
||||
|
||||
%% API
|
||||
-export([push_task/3]).
|
||||
-export([pop_task/2]).
|
||||
-export([count_workers/1]).
|
||||
-export([capture_worker/2]).
|
||||
-export([return_worker/3]).
|
||||
-export([release_worker/3]).
|
||||
-export([continuation_task/3]).
|
||||
|
||||
-record(prg_scheduler_state, {ns_id, ns_opts, ready, free_workers, owners}).
|
||||
|
||||
%%%%%%%
|
||||
%%% API
|
||||
%%%%%%%
|
||||
|
||||
-spec push_task(namespace_id(), task_header(), task()) -> ok.
|
||||
push_task(NsId, TaskHeader, Task) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:cast(RegName, {push_task, TaskHeader, Task}).
|
||||
|
||||
-spec pop_task(namespace_id(), pid()) -> {task_header(), task()} | not_found.
|
||||
pop_task(NsId, Worker) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:call(RegName, {pop_task, Worker}, infinity).
|
||||
|
||||
-spec continuation_task(namespace_id(), pid(), task()) -> {task_header(), task()} | ok.
|
||||
continuation_task(NsId, Worker, Task) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:call(RegName, {continuation_task, Worker, Task}).
|
||||
|
||||
-spec count_workers(namespace_id()) -> non_neg_integer().
|
||||
count_workers(NsId) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:call(RegName, count_workers, infinity).
|
||||
|
||||
-spec capture_worker(namespace_id(), pid()) -> {ok, pid()} | {error, not_found | nested_capture}.
|
||||
capture_worker(NsId, Owner) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:call(RegName, {capture_worker, Owner}, infinity).
|
||||
|
||||
-spec return_worker(namespace_id(), pid(), pid()) -> ok.
|
||||
return_worker(NsId, Owner, Worker) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:cast(RegName, {return_worker, Owner, Worker}).
|
||||
|
||||
-spec release_worker(namespace_id(), pid(), pid()) -> ok.
|
||||
release_worker(NsId, Owner, Pid) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:cast(RegName, {release_worker, Owner, Pid}).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Spawning and gen_server implementation
|
||||
%%%===================================================================
|
||||
|
||||
start_link({NsId, _NsOpts} = NS) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_scheduler"),
|
||||
gen_server:start_link({local, RegName}, ?MODULE, NS, []).
|
||||
|
||||
init({NsId, Opts}) ->
|
||||
_ = start_workers(NsId, Opts),
|
||||
State = #prg_scheduler_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = Opts,
|
||||
ready = queue:new(),
|
||||
free_workers = queue:new(),
|
||||
owners = #{}
|
||||
},
|
||||
{ok, State}.
|
||||
|
||||
handle_call({pop_task, Worker}, _From, State) ->
|
||||
case queue:out(State#prg_scheduler_state.ready) of
|
||||
{{value, TaskData}, NewReady} ->
|
||||
{reply, TaskData, State#prg_scheduler_state{ready = NewReady}};
|
||||
{empty, _} ->
|
||||
Workers = State#prg_scheduler_state.free_workers,
|
||||
{reply, not_found, State#prg_scheduler_state{free_workers = queue:in(Worker, Workers)}}
|
||||
end;
|
||||
handle_call(count_workers, _From, #prg_scheduler_state{free_workers = Workers} = State) ->
|
||||
{reply, queue:len(Workers), State};
|
||||
handle_call(
|
||||
{capture_worker, Owner}, _From,
|
||||
#prg_scheduler_state{owners = Owners} = State
|
||||
) when erlang:is_map_key(Owner, Owners) ->
|
||||
{reply, {error, nested_capture}, State};
|
||||
handle_call(
|
||||
{capture_worker, Owner}, _From,
|
||||
#prg_scheduler_state{owners = Owners} = State
|
||||
) ->
|
||||
case queue:out(State#prg_scheduler_state.free_workers) of
|
||||
{{value, Worker}, NewWorkers} ->
|
||||
MRef = erlang:monitor(process, Owner),
|
||||
NewOwners = Owners#{Owner => {MRef, Worker}},
|
||||
{reply, {ok, Worker}, State#prg_scheduler_state{free_workers = NewWorkers, owners = NewOwners}};
|
||||
{empty, _} ->
|
||||
{reply, {error, not_found}, State}
|
||||
end;
|
||||
handle_call({continuation_task, Task}, _From, State) ->
|
||||
case queue:out(State#prg_scheduler_state.ready) of
|
||||
{{value, TaskData}, NewReady} ->
|
||||
{reply, TaskData, State#prg_scheduler_state{
|
||||
ready = queue:in({header(), Task}, NewReady)
|
||||
}};
|
||||
{empty, _} ->
|
||||
{reply, ok, State}
|
||||
end;
|
||||
handle_call(_Request, _From, State = #prg_scheduler_state{}) ->
|
||||
{reply, ok, State}.
|
||||
|
||||
handle_cast({push_task, TaskHeader, Task}, State) ->
|
||||
NewState = do_push_task(TaskHeader, Task, State),
|
||||
{noreply, NewState};
|
||||
handle_cast(
|
||||
{return_worker, Owner, Worker},
|
||||
State = #prg_scheduler_state{free_workers = Workers, owners = Owners}
|
||||
) ->
|
||||
case maps:get(Owner, Owners, undefined) of
|
||||
undefined ->
|
||||
skip;
|
||||
{Ref, Worker} ->
|
||||
_ = erlang:demonitor(Ref)
|
||||
end,
|
||||
NewWorkers = queue:in(Worker, Workers),
|
||||
NewOwners = maps:without([Owner], Owners),
|
||||
{noreply, State#prg_scheduler_state{free_workers = NewWorkers, owners = NewOwners}};
|
||||
handle_cast(
|
||||
{release_worker, Owner, Worker},
|
||||
State = #prg_scheduler_state{owners = Owners}
|
||||
) ->
|
||||
NewState = case maps:get(Owner, Owners, undefined) of
|
||||
undefined ->
|
||||
State;
|
||||
{Ref, Worker} ->
|
||||
_ = erlang:demonitor(Ref),
|
||||
State#prg_scheduler_state{owners = maps:without([Owner], Owners)}
|
||||
end,
|
||||
{noreply, NewState};
|
||||
handle_cast(_Request, State = #prg_scheduler_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
handle_info(
|
||||
{'DOWN', _Ref, process, Pid, _Info},
|
||||
State = #prg_scheduler_state{owners = Owners}
|
||||
) when erlang:is_map_key(Pid, Owners) ->
|
||||
{noreply, State#prg_scheduler_state{owners = maps:without([Pid], Owners)}};
|
||||
handle_info(_Info, State = #prg_scheduler_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State = #prg_scheduler_state{}) ->
|
||||
ok.
|
||||
|
||||
code_change(_OldVsn, State = #prg_scheduler_state{}, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
||||
|
||||
start_workers(NsId, NsOpts) ->
|
||||
WorkerPoolSize = maps:get(worker_pool_size, NsOpts, ?DEFAULT_WORKER_POOL_SIZE),
|
||||
WorkerSup = prg_utils:registered_name(NsId, "_worker_sup"),
|
||||
lists:foreach(fun(N) ->
|
||||
supervisor:start_child(WorkerSup, [N])
|
||||
end, lists:seq(1, WorkerPoolSize)).
|
||||
|
||||
do_push_task(TaskHeader, Task, State) ->
|
||||
FreeWorkers = State#prg_scheduler_state.free_workers,
|
||||
case queue:out(FreeWorkers) of
|
||||
{{value, Worker}, NewQueue} ->
|
||||
ok = prg_worker:process_task(Worker, TaskHeader, Task),
|
||||
State#prg_scheduler_state{
|
||||
free_workers = NewQueue
|
||||
};
|
||||
{empty, _} ->
|
||||
OldReady = State#prg_scheduler_state.ready,
|
||||
State#prg_scheduler_state{
|
||||
ready = queue:in({TaskHeader, Task}, OldReady)
|
||||
}
|
||||
end.
|
||||
|
||||
header() ->
|
||||
header(<<"timeout">>).
|
||||
|
||||
header(Type) ->
|
||||
{erlang:binary_to_atom(Type), undefined}.
|
150
src/prg_storage.erl
Normal file
150
src/prg_storage.erl
Normal file
@ -0,0 +1,150 @@
|
||||
-module(prg_storage).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
%% api handler functions
|
||||
-export([get_task_result/3]).
|
||||
-export([get_process_status/3]).
|
||||
-export([prepare_init/4]).
|
||||
-export([prepare_call/4]).
|
||||
-export([prepare_repair/4]).
|
||||
|
||||
%% scan functions
|
||||
-export([search_timers/4]).
|
||||
-export([search_calls/3]).
|
||||
-export([collect_zombies/3]).
|
||||
|
||||
%% worker functions
|
||||
-export([complete_and_continue/6]).
|
||||
-export([complete_and_suspend/5]).
|
||||
-export([complete_and_unlock/5]).
|
||||
-export([complete_and_error/4]).
|
||||
-export([remove_process/3]).
|
||||
|
||||
%% shared functions
|
||||
-export([get_task/3]).
|
||||
-export([get_task/4]).
|
||||
-export([get_process/3]).
|
||||
-export([get_process/4]).
|
||||
-export([get_process/5]).
|
||||
|
||||
%% Init operations
|
||||
-export([db_init/2]).
|
||||
|
||||
%-ifdef(TEST).
|
||||
-export([cleanup/2]).
|
||||
%-endif.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% API handler functions
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
-spec get_task_result(storage_opts(), namespace_id(), {task_id | idempotency_key, binary()}) ->
|
||||
{ok, term()} | {error, _Reason}.
|
||||
get_task_result(#{client := Handler, options := HandlerOpts}, NsId, KeyOrId) ->
|
||||
Handler:get_task_result(HandlerOpts, NsId, KeyOrId).
|
||||
|
||||
-spec get_process_status(storage_opts(), namespace_id(), id()) -> {ok, _Result} | {error, _Reason}.
|
||||
get_process_status(#{client := Handler, options := HandlerOpts}, NsId, Id) ->
|
||||
Handler:get_process_status(HandlerOpts, NsId, Id).
|
||||
|
||||
-spec prepare_init(storage_opts(), namespace_id(), id(), task()) ->
|
||||
{ok, {postpone, task_id()} | {continue, task_id()}} | {error, _Reason}.
|
||||
prepare_init(#{client := Handler, options := HandlerOpts}, NsId, ProcessId, InitTask) ->
|
||||
Handler:prepare_init(HandlerOpts, NsId, ?NEW_PROCESS(ProcessId), InitTask).
|
||||
|
||||
-spec prepare_call(storage_opts(), namespace_id(), id(), task()) ->
|
||||
{ok, {postpone, task_id()} | {continue, task_id()}} | {error, _Error}.
|
||||
prepare_call(#{client := Handler, options := HandlerOpts}, NsId, ProcessId, Task) ->
|
||||
Handler:prepare_call(HandlerOpts, NsId, ProcessId, Task).
|
||||
|
||||
-spec prepare_repair(storage_opts(), namespace_id(), id(), task()) ->
|
||||
{ok, {postpone, task_id()} | {continue, task_id()}} | {error, _Reason}.
|
||||
prepare_repair(#{client := Handler, options := HandlerOpts}, NsId, ProcessId, RepairTask) ->
|
||||
Handler:prepare_repair(HandlerOpts, NsId, ProcessId, RepairTask).
|
||||
|
||||
%%%%%%%%%%%%%%%%%
|
||||
%% Scan functions
|
||||
%%%%%%%%%%%%%%%%%
|
||||
|
||||
-spec search_timers(storage_opts(), namespace_id(), timeout_sec(), pos_integer()) ->
|
||||
[task()] | {error, _Reason}.
|
||||
search_timers(#{client := Handler, options := HandlerOpts}, NsId, Timeout, Limit) ->
|
||||
Handler:search_timers(HandlerOpts, NsId, Timeout, Limit).
|
||||
|
||||
-spec search_calls(storage_opts(), namespace_id(), pos_integer()) -> [task()] | {error, _Reason}.
|
||||
search_calls(#{client := Handler, options := HandlerOpts}, NsId, Limit) ->
|
||||
Handler:search_calls(HandlerOpts, NsId, Limit).
|
||||
|
||||
-spec collect_zombies(storage_opts(), namespace_id(), timeout_sec()) -> ok.
|
||||
collect_zombies(#{client := Handler, options := HandlerOpts}, NsId, Timeout) ->
|
||||
Handler:collect_zombies(HandlerOpts, NsId, Timeout).
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%
|
||||
%% Worker functions
|
||||
%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
-spec complete_and_continue(storage_opts(), namespace_id(), task_result(), process(), [event()], task()) ->
|
||||
{ok, [task()]}.
|
||||
complete_and_continue(#{client := Handler, options := HandlerOpts}, NsId, TaskResult, Process, Events, NextTask) ->
|
||||
Handler:complete_and_continue(HandlerOpts, NsId, TaskResult, Process, Events, NextTask).
|
||||
|
||||
-spec complete_and_suspend(storage_opts(), namespace_id(), task_result(), process(), [event()]) ->
|
||||
{ok, [task()]}.
|
||||
complete_and_suspend(#{client := Handler, options := HandlerOpts}, NsId, TaskResult, Process, Events) ->
|
||||
Handler:complete_and_suspend(HandlerOpts, NsId, TaskResult, Process, Events).
|
||||
|
||||
-spec complete_and_error(storage_opts(), namespace_id(), task_result(), process()) -> ok.
|
||||
complete_and_error(#{client := Handler, options := HandlerOpts}, NsId, TaskResult, Process) ->
|
||||
Handler:complete_and_error(HandlerOpts, NsId, TaskResult, Process).
|
||||
|
||||
-spec complete_and_unlock(storage_opts(), namespace_id(), task_result(), process(), [event()]) ->
|
||||
{ok, [task()]}.
|
||||
complete_and_unlock(#{client := Handler, options := HandlerOpts}, NsId, TaskResult, Process, Events) ->
|
||||
Handler:complete_and_unlock(HandlerOpts, NsId, TaskResult, Process, Events).
|
||||
|
||||
-spec remove_process(storage_opts(), namespace_id(), id()) -> ok | no_return().
|
||||
remove_process(#{client := Handler, options := HandlerOpts}, NsId, ProcessId) ->
|
||||
Handler:remove_process(HandlerOpts, NsId, ProcessId).
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% Shared functions (recipient required)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
-spec get_task(storage_opts(), namespace_id(), task_id()) -> {ok, task()} | {error, _Reason}.
|
||||
get_task(StorageOpts, NsId, TaskId) ->
|
||||
get_task(internal, StorageOpts, NsId, TaskId).
|
||||
|
||||
-spec get_task(recipient(), storage_opts(), namespace_id(), task_id()) -> {ok, task()} | {error, _Reason}.
|
||||
get_task(Recipient, #{client := Handler, options := HandlerOpts}, NsId, TaskId) ->
|
||||
Handler:get_task(Recipient, HandlerOpts, NsId, TaskId).
|
||||
|
||||
-spec get_process(storage_opts(), namespace_id(), id()) -> {ok, process()} | {error, _Reason}.
|
||||
get_process(StorageOpts, NsId, ProcessId) ->
|
||||
get_process(internal, StorageOpts, NsId, ProcessId, #{}).
|
||||
|
||||
-spec get_process(
|
||||
storage_opts() | recipient(),
|
||||
namespace_id() | storage_opts(),
|
||||
id() | namespace_id(),
|
||||
history_range() | id()
|
||||
) -> {ok, process()} | {error, _Reason}.
|
||||
get_process(StorageOpts, NsId, ProcessId, HistoryRange) when is_map(StorageOpts) ->
|
||||
get_process(internal, StorageOpts, NsId, ProcessId, HistoryRange);
|
||||
get_process(Recipient, StorageOpts, NsId, ProcessId) when is_atom(Recipient) ->
|
||||
get_process(Recipient, StorageOpts, NsId, ProcessId, #{}).
|
||||
|
||||
-spec get_process(recipient(), storage_opts(), namespace_id(), id(), history_range()) ->
|
||||
{ok, process()} | {error, _Reason}.
|
||||
get_process(Recipient, #{client := Handler, options := HandlerOpts}, NsId, ProcessId, HistoryRange) ->
|
||||
Handler:get_process(Recipient, HandlerOpts, NsId, ProcessId, HistoryRange).
|
||||
|
||||
%%%
|
||||
|
||||
-spec db_init(storage_opts(), namespace_id()) -> ok.
|
||||
db_init(#{client := Handler, options := HandlerOpts}, NsId) ->
|
||||
Handler:db_init(HandlerOpts, NsId).
|
||||
|
||||
-spec cleanup(_, _) -> _.
|
||||
cleanup(#{client := Handler, options := HandlerOpts}, NsId) ->
|
||||
Handler:cleanup(HandlerOpts, NsId).
|
60
src/prg_utils.erl
Normal file
60
src/prg_utils.erl
Normal file
@ -0,0 +1,60 @@
|
||||
-module(prg_utils).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
%% API
|
||||
-export([registered_name/2]).
|
||||
-export([pipe/2]).
|
||||
-export([format/1]).
|
||||
-export([make_ns_opts/2]).
|
||||
-export([with_observe/3]).
|
||||
-export([with_observe/4]).
|
||||
|
||||
-spec registered_name(atom(), string()) -> atom().
|
||||
registered_name(BaseAtom, PostfixStr) ->
|
||||
erlang:list_to_atom(erlang:atom_to_list(BaseAtom) ++ PostfixStr).
|
||||
|
||||
-spec pipe([function()], term()) -> term().
|
||||
pipe([], Result) -> Result;
|
||||
pipe(_Funs, {error, _} = Error) -> Error;
|
||||
pipe(_Funs, {break, Result}) -> Result;
|
||||
pipe([F | Rest], Acc) ->
|
||||
pipe(Rest, F(Acc)).
|
||||
|
||||
-spec format(term()) -> binary().
|
||||
format(Term) when is_binary(Term) ->
|
||||
Term;
|
||||
format(Term) ->
|
||||
unicode:characters_to_binary(io_lib:format("~64000p", [Term])).
|
||||
|
||||
-spec make_ns_opts(atom(), namespace_opts()) -> namespace_opts().
|
||||
make_ns_opts(NsId, NsOpts) ->
|
||||
PresetDefaults = #{
|
||||
namespace => erlang:atom_to_binary(NsId),
|
||||
retry_policy => ?DEFAULT_RETRY_POLICY,
|
||||
worker_pool_size => ?DEFAULT_WORKER_POOL_SIZE,
|
||||
process_step_timeout => ?DEFAULT_STEP_TIMEOUT_SEC,
|
||||
task_scan_timeout => (?DEFAULT_STEP_TIMEOUT_SEC div 2) + 1,
|
||||
last_timer_repair => false
|
||||
},
|
||||
ConfigDefaults = application:get_env(progressor, defaults, #{}),
|
||||
Defaults = maps:merge(PresetDefaults, ConfigDefaults),
|
||||
maps:merge(Defaults, NsOpts).
|
||||
|
||||
-spec with_observe(_Fun, atom(), [list() | binary()]) -> any().
|
||||
with_observe(Fun, MetricKey, Labels) ->
|
||||
with_observe(Fun, histogram, MetricKey, Labels).
|
||||
|
||||
-spec with_observe(_Fun, atom(), atom(), [list() | binary()]) -> any().
|
||||
with_observe(Fun, MetricType, MetricKey, Labels) ->
|
||||
{DurationMicro, Result} = timer:tc(Fun),
|
||||
DurationMs = DurationMicro div 1000,
|
||||
logger:debug("metric: ~p, labels: ~p, value: ~p", [MetricKey, Labels, DurationMs]),
|
||||
collect(MetricType, MetricKey, Labels, DurationMs),
|
||||
Result.
|
||||
|
||||
collect(histogram, MetricKey, Labels, Value) ->
|
||||
prometheus_histogram:observe(MetricKey, Labels, Value).
|
||||
%%collect(_, _MetricKey, _Labels, _Value) ->
|
||||
%% %% TODO implement it
|
||||
%% ok.
|
512
src/prg_worker.erl
Normal file
512
src/prg_worker.erl
Normal file
@ -0,0 +1,512 @@
|
||||
-module(prg_worker).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
-export([start_link/3]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
|
||||
code_change/3]).
|
||||
-export([handle_continue/2]).
|
||||
|
||||
-export([process_task/3]).
|
||||
-export([continuation_task/3]).
|
||||
-export([next_task/1]).
|
||||
|
||||
-record(prg_worker_state, {ns_id, ns_opts, num, process, sidecar_pid}).
|
||||
|
||||
%%%
|
||||
%%% API
|
||||
%%%
|
||||
|
||||
-spec process_task(pid(), task_header(), task()) -> ok.
|
||||
process_task(Worker, TaskHeader, #{process_id := _ProcessId, task_id := _TaskId} = Task) ->
|
||||
gen_server:cast(Worker, {process_task, TaskHeader, Task}).
|
||||
|
||||
-spec continuation_task(pid(), task_header(), task()) -> ok.
|
||||
continuation_task(Worker, TaskHeader, Task) ->
|
||||
gen_server:cast(Worker, {continuation_task, TaskHeader, Task}).
|
||||
|
||||
-spec next_task(pid()) -> ok.
|
||||
next_task(Worker) ->
|
||||
gen_server:cast(Worker, next_task).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Spawning and gen_server implementation
|
||||
%%%===================================================================
|
||||
|
||||
start_link(NsId, NsOpts, Num) ->
|
||||
gen_server:start_link(?MODULE, [NsId, NsOpts, Num], []).
|
||||
|
||||
init([NsId, NsOpts, Num]) ->
|
||||
{ok, #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = NsOpts,
|
||||
num = Num
|
||||
}, {continue, do_start}}.
|
||||
|
||||
handle_continue(do_start, State = #prg_worker_state{ns_id = NsId}) ->
|
||||
{ok, Pid} = prg_worker_sidecar:start_link(),
|
||||
case prg_scheduler:pop_task(NsId, self()) of
|
||||
{TaskHeader, Task} ->
|
||||
ok = process_task(self(), TaskHeader, Task);
|
||||
not_found ->
|
||||
skip
|
||||
end,
|
||||
{noreply, State#prg_worker_state{sidecar_pid = Pid}}.
|
||||
|
||||
handle_call(_Request, _From, State = #prg_worker_state{}) ->
|
||||
{reply, ok, State}.
|
||||
|
||||
handle_cast(
|
||||
{process_task, TaskHeader, Task},
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts, process_step_timeout := TimeoutSec} = _NsOpts,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
Deadline = erlang:system_time(millisecond) + TimeoutSec * 1000,
|
||||
ProcessId = maps:get(process_id, Task),
|
||||
{ok, Process} = prg_worker_sidecar:get_process(Pid, Deadline, StorageOpts, NsId, ProcessId),
|
||||
NewState = do_process_task(TaskHeader, Task, Deadline, State#prg_worker_state{process = Process}),
|
||||
{noreply, NewState};
|
||||
handle_cast(
|
||||
{continuation_task, TaskHeader, Task},
|
||||
State = #prg_worker_state{
|
||||
ns_opts = #{process_step_timeout := TimeoutSec}
|
||||
}
|
||||
) ->
|
||||
Deadline = erlang:system_time(millisecond) + TimeoutSec * 1000,
|
||||
NewState = do_process_task(TaskHeader, Task, Deadline, State),
|
||||
{noreply, NewState};
|
||||
handle_cast(
|
||||
next_task,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts, process_step_timeout := TimeoutSec},
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
NewState =
|
||||
case prg_scheduler:pop_task(NsId, self()) of
|
||||
{TaskHeader, Task} ->
|
||||
Deadline = erlang:system_time(millisecond) + TimeoutSec * 1000,
|
||||
ProcessId = maps:get(process_id, Task),
|
||||
{ok, Process} = prg_worker_sidecar:get_process(Pid, Deadline, StorageOpts, NsId, ProcessId),
|
||||
do_process_task(TaskHeader, Task, Deadline, State#prg_worker_state{process = Process});
|
||||
not_found ->
|
||||
State
|
||||
end,
|
||||
{noreply, NewState}.
|
||||
|
||||
handle_info(_Info, State = #prg_worker_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State = #prg_worker_state{}) ->
|
||||
ok.
|
||||
|
||||
code_change(_OldVsn, State = #prg_worker_state{}, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
||||
|
||||
%% remove process by timer
|
||||
do_process_task(
|
||||
_TaskHeader,
|
||||
#{task_type := <<"remove">>} = _Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId} = _Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, remove, ProcessId),
|
||||
ok = prg_worker_sidecar:remove_process(Pid, Deadline, StorageOpts, NsId, ProcessId),
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined};
|
||||
do_process_task(
|
||||
TaskHeader,
|
||||
Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = _NsId,
|
||||
ns_opts = NsOpts,
|
||||
process = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
Args = maps:get(args, Task, <<>>),
|
||||
Ctx = maps:get(context, Task, <<>>),
|
||||
Request = {extract_task_type(TaskHeader), Args, Process},
|
||||
Result = prg_worker_sidecar:process(Pid, Deadline, NsOpts, Request, Ctx),
|
||||
handle_result(Result, TaskHeader, Task, Deadline, State).
|
||||
|
||||
%% success result with timer
|
||||
handle_result(
|
||||
{ok, #{action := #{set_timer := Timestamp} = Action, events := Events} = Result},
|
||||
TaskHeader,
|
||||
#{task_id := TaskId, context := Context} = Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId} = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
Now = erlang:system_time(second),
|
||||
ProcessUpdated = update_process(maps:without([detail,corrupted_by], Process#{status => <<"running">>}), Result),
|
||||
Response = response(maps:get(response, Result, undefined)),
|
||||
TaskResult = #{
|
||||
task_id => TaskId,
|
||||
response => term_to_binary(Response),
|
||||
finished_time => Now,
|
||||
status => <<"finished">>
|
||||
},
|
||||
NewTask = maps:merge(
|
||||
#{
|
||||
process_id => ProcessId,
|
||||
task_type => action_to_task_type(Action),
|
||||
status => create_status(Timestamp, Now),
|
||||
scheduled_time => Timestamp,
|
||||
context => Context,
|
||||
last_retry_interval => 0,
|
||||
attempts_count => 0
|
||||
},
|
||||
maps:with([metadata], Task)
|
||||
),
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, extract_task_type(TaskHeader), ProcessId),
|
||||
ok = prg_worker_sidecar:event_sink(Pid, Deadline, NsOpts, ProcessId, Events),
|
||||
%% just for tests
|
||||
ok = maybe_wait_call(application:get_env(progressor, call_wait_timeout, undefined)),
|
||||
%%
|
||||
SaveResult = prg_worker_sidecar:complete_and_continue(Pid, Deadline, StorageOpts, NsId, TaskResult,
|
||||
ProcessUpdated, Events, NewTask),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
case SaveResult of
|
||||
{ok, []} ->
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined};
|
||||
{ok, [ContinuationTask | _]} ->
|
||||
NewHistory = maps:get(history, Process) ++ Events,
|
||||
ok = continuation_task(self(), create_header(ContinuationTask), ContinuationTask),
|
||||
State#prg_worker_state{process = ProcessUpdated#{history => NewHistory}}
|
||||
end;
|
||||
|
||||
%% success result with undefined timer and remove action
|
||||
handle_result(
|
||||
{ok, #{action := #{remove := true}} = Result},
|
||||
TaskHeader,
|
||||
_Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId} = _Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
Response = response(maps:get(response, Result, undefined)),
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, remove, ProcessId),
|
||||
ok = prg_worker_sidecar:remove_process(Pid, Deadline, StorageOpts, NsId, ProcessId),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined};
|
||||
|
||||
%% success result with unset_timer action
|
||||
handle_result(
|
||||
{ok, #{events := Events, action := unset_timer} = Result},
|
||||
TaskHeader,
|
||||
#{task_id := TaskId} = _Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId} = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, extract_task_type(TaskHeader), ProcessId),
|
||||
ok = prg_worker_sidecar:event_sink(Pid, Deadline, NsOpts, ProcessId, Events),
|
||||
ProcessUpdated = update_process(maps:without([detail,corrupted_by], Process#{status => <<"running">>}), Result),
|
||||
Response = response(maps:get(response, Result, undefined)),
|
||||
TaskResult = #{
|
||||
task_id => TaskId,
|
||||
response => term_to_binary(Response),
|
||||
finished_time => erlang:system_time(second),
|
||||
status => <<"finished">>
|
||||
},
|
||||
SaveResult = prg_worker_sidecar:complete_and_suspend(Pid, Deadline, StorageOpts, NsId, TaskResult,
|
||||
ProcessUpdated, Events),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
case SaveResult of
|
||||
{ok, []} ->
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined};
|
||||
{ok, [ContinuationTask | _]} ->
|
||||
NewHistory = maps:get(history, Process) ++ Events,
|
||||
ok = continuation_task(self(), create_header(ContinuationTask), ContinuationTask),
|
||||
State#prg_worker_state{process = ProcessUpdated#{history => NewHistory}}
|
||||
end;
|
||||
|
||||
%% success repair with corrupted task and undefined action
|
||||
handle_result(
|
||||
{ok, #{events := Events} = Result},
|
||||
{repair, _} = TaskHeader,
|
||||
#{task_id := TaskId} = _Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId, corrupted_by := ErrorTaskId} = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
Now = erlang:system_time(second),
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, extract_task_type(TaskHeader), ProcessId),
|
||||
ok = prg_worker_sidecar:event_sink(Pid, Deadline, NsOpts, ProcessId, Events),
|
||||
ProcessUpdated = update_process(maps:without([detail,corrupted_by], Process#{status => <<"running">>}), Result),
|
||||
Response = response(maps:get(response, Result, undefined)),
|
||||
TaskResult = #{
|
||||
task_id => TaskId,
|
||||
response => term_to_binary(Response),
|
||||
finished_time => erlang:system_time(second),
|
||||
status => <<"finished">>
|
||||
},
|
||||
{ok, ErrorTask} = prg_worker_sidecar:get_task(Pid, Deadline, StorageOpts, NsId, ErrorTaskId),
|
||||
case ErrorTask of
|
||||
#{task_type := Type} when Type =:= <<"timeout">>; Type =:= <<"remove">> ->
|
||||
%% machinegun legacy behaviour
|
||||
NewTask0 = maps:with([process_id, task_type, scheduled_time, args, metadata, context], ErrorTask),
|
||||
NewTask = NewTask0#{
|
||||
status => <<"running">>,
|
||||
running_time => Now,
|
||||
last_retry_interval => 0,
|
||||
attempts_count => 0
|
||||
},
|
||||
{ok, [ContinuationTask | _]} = prg_worker_sidecar:complete_and_continue(Pid, Deadline, StorageOpts, NsId,
|
||||
TaskResult, ProcessUpdated, Events, NewTask),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
NewHistory = maps:get(history, Process) ++ Events,
|
||||
ok = continuation_task(self(), create_header(ContinuationTask), ContinuationTask),
|
||||
State#prg_worker_state{process = ProcessUpdated#{history => NewHistory}};
|
||||
_ ->
|
||||
{ok, []} = prg_worker_sidecar:complete_and_unlock(Pid, Deadline, StorageOpts, NsId, TaskResult,
|
||||
ProcessUpdated, Events),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined}
|
||||
end;
|
||||
|
||||
%% success result with undefined action
|
||||
handle_result(
|
||||
{ok, #{events := Events} = Result},
|
||||
TaskHeader,
|
||||
#{task_id := TaskId} = _Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId} = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) ->
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, extract_task_type(TaskHeader), ProcessId),
|
||||
ok = prg_worker_sidecar:event_sink(Pid, Deadline, NsOpts, ProcessId, Events),
|
||||
ProcessUpdated = update_process(maps:without([detail,corrupted_by], Process#{status => <<"running">>}), Result),
|
||||
Response = response(maps:get(response, Result, undefined)),
|
||||
TaskResult = #{
|
||||
task_id => TaskId,
|
||||
response => term_to_binary(Response),
|
||||
finished_time => erlang:system_time(second),
|
||||
status => <<"finished">>
|
||||
},
|
||||
SaveResult = prg_worker_sidecar:complete_and_unlock(Pid, Deadline, StorageOpts, NsId, TaskResult,
|
||||
ProcessUpdated, Events),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
case SaveResult of
|
||||
{ok, []} ->
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined};
|
||||
{ok, [ContinuationTask | _]} ->
|
||||
NewHistory = maps:get(history, Process) ++ Events,
|
||||
ok = continuation_task(self(), create_header(ContinuationTask), ContinuationTask),
|
||||
State#prg_worker_state{process = ProcessUpdated#{history => NewHistory}}
|
||||
end;
|
||||
|
||||
%% calls processing error
|
||||
handle_result(
|
||||
{error, Reason} = Response,
|
||||
{TaskType, _} = TaskHeader,
|
||||
#{task_id := TaskId} = _Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts} = NsOpts,
|
||||
process = #{process_id := ProcessId} = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) when
|
||||
TaskType =:= init;
|
||||
TaskType =:= call;
|
||||
TaskType =:= notify;
|
||||
TaskType =:= repair
|
||||
->
|
||||
ProcessUpdated = case TaskType of
|
||||
repair ->
|
||||
Process;
|
||||
_ ->
|
||||
Detail = prg_utils:format(Reason),
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, {error, Detail}, ProcessId),
|
||||
Process#{status => <<"error">>, detail => Detail}
|
||||
end,
|
||||
TaskResult = #{
|
||||
task_id => TaskId,
|
||||
response => term_to_binary(Response),
|
||||
finished_time => erlang:system_time(second),
|
||||
status => <<"error">>
|
||||
},
|
||||
ok = prg_worker_sidecar:complete_and_error(Pid, Deadline, StorageOpts, NsId, TaskResult, ProcessUpdated),
|
||||
_ = maybe_reply(TaskHeader, Response),
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined};
|
||||
|
||||
%% timeout/remove processing error
|
||||
handle_result(
|
||||
{error, Reason} = Response,
|
||||
{TaskType, _} = TaskHeader,
|
||||
#{task_id := TaskId} = Task,
|
||||
Deadline,
|
||||
State = #prg_worker_state{
|
||||
ns_id = NsId,
|
||||
ns_opts = #{storage := StorageOpts, retry_policy := RetryPolicy} = NsOpts,
|
||||
process = #{process_id := ProcessId} = Process,
|
||||
sidecar_pid = Pid
|
||||
}
|
||||
) when TaskType =:= timeout; TaskType =:= remove ->
|
||||
TaskResult = #{
|
||||
task_id => TaskId,
|
||||
response => term_to_binary(Response),
|
||||
finished_time => erlang:system_time(second),
|
||||
status => <<"error">>
|
||||
},
|
||||
case check_retryable(TaskHeader, Task, RetryPolicy, Reason) of
|
||||
not_retryable ->
|
||||
Detail = prg_utils:format(Reason),
|
||||
ProcessUpdated = Process#{status => <<"error">>, detail => Detail, corrupted_by => TaskId},
|
||||
ok = prg_worker_sidecar:lifecycle_sink(Pid, Deadline, NsOpts, {error, Detail}, ProcessId),
|
||||
ok = prg_worker_sidecar:complete_and_error(Pid, Deadline, StorageOpts, NsId, TaskResult, ProcessUpdated);
|
||||
NewTask ->
|
||||
{ok, _} = prg_worker_sidecar:complete_and_continue(Pid, Deadline, StorageOpts, NsId,
|
||||
TaskResult, Process, [], NewTask)
|
||||
end,
|
||||
ok = next_task(self()),
|
||||
State#prg_worker_state{process = undefined}.
|
||||
|
||||
update_process(Process, Result) ->
|
||||
maps:fold(fun
|
||||
(metadata, Meta, Acc) -> Acc#{metadata => Meta};
|
||||
(aux_state, AuxState, Acc) -> Acc#{aux_state => AuxState};
|
||||
(_, _, Acc) -> Acc
|
||||
end, Process, Result).
|
||||
|
||||
-spec maybe_reply(task_header(), term()) -> term().
|
||||
maybe_reply({_, undefined}, _) ->
|
||||
undefined;
|
||||
maybe_reply({_, {Receiver, Ref}}, Response) ->
|
||||
progressor:reply(Receiver, {Ref, Response}).
|
||||
|
||||
response({error, _} = Error) ->
|
||||
Error;
|
||||
response(undefined) ->
|
||||
{ok, ok};
|
||||
response(Data) ->
|
||||
{ok, Data}.
|
||||
|
||||
extract_task_type({TaskType, _}) ->
|
||||
TaskType.
|
||||
|
||||
check_retryable(TaskHeader, #{last_retry_interval := LastInterval} = Task, RetryPolicy, Error) ->
|
||||
Now = erlang:system_time(second),
|
||||
Timeout =
|
||||
case LastInterval =:= 0 of
|
||||
true -> maps:get(initial_timeout, RetryPolicy);
|
||||
false -> trunc(LastInterval * maps:get(backoff_coefficient, RetryPolicy))
|
||||
end,
|
||||
Attempts = maps:get(attempts_count, Task) + 1,
|
||||
case is_retryable(Error, TaskHeader, RetryPolicy, Timeout, Attempts) of
|
||||
true ->
|
||||
maps:with(
|
||||
[process_id, task_type, status, scheduled_time, args, last_retry_interval, attempts_count, metadata],
|
||||
Task#{
|
||||
status => <<"waiting">>,
|
||||
scheduled_time => Now + Timeout,
|
||||
last_retry_interval => Timeout,
|
||||
attempts_count => Attempts
|
||||
}
|
||||
);
|
||||
false ->
|
||||
not_retryable
|
||||
end.
|
||||
|
||||
%% machinegun legacy
|
||||
is_retryable(
|
||||
{exception, _, {woody_error, {_, result_unexpected, _}}} = _Error,
|
||||
_TaskHeader,
|
||||
_RetryPolicy,
|
||||
_Timeout,
|
||||
_Attempts
|
||||
) ->
|
||||
false;
|
||||
is_retryable(
|
||||
{exception, _, {woody_error, {_, Class, _}}} = Error,
|
||||
{timeout, undefined},
|
||||
RetryPolicy,
|
||||
Timeout,
|
||||
Attempts
|
||||
) when Class =:= resource_unavailable orelse Class =:= result_unknown ->
|
||||
Timeout < maps:get(max_timeout, RetryPolicy, infinity) andalso
|
||||
Attempts < maps:get(max_attempts, RetryPolicy, infinity) andalso
|
||||
not lists:any(fun(E) -> Error =:= E end, maps:get(non_retryable_errors, RetryPolicy, []));
|
||||
|
||||
is_retryable({exception, _, _} = _Error, _TaskHeader, _RetryPolicy, _Timeout, _Attempts) ->
|
||||
false;
|
||||
is_retryable(Error, {timeout, undefined}, RetryPolicy, Timeout, Attempts) ->
|
||||
Timeout < maps:get(max_timeout, RetryPolicy, infinity) andalso
|
||||
Attempts < maps:get(max_attempts, RetryPolicy, infinity) andalso
|
||||
not lists:any(fun(E) -> Error =:= E end, maps:get(non_retryable_errors, RetryPolicy, []));
|
||||
is_retryable(_Error, _TaskHeader, _RetryPolicy, _Timeout, _Attempts) ->
|
||||
false.
|
||||
|
||||
create_status(Timestamp, Now) when Timestamp =< Now ->
|
||||
<<"running">>;
|
||||
create_status(_Timestamp, _Now) ->
|
||||
<<"waiting">>.
|
||||
|
||||
create_header(#{task_type := <<"timeout">>}) ->
|
||||
{timeout, undefined};
|
||||
create_header(#{task_type := <<"call">>}) ->
|
||||
{call, undefined};
|
||||
create_header(#{task_type := <<"remove">>}) ->
|
||||
{remove, undefined};
|
||||
create_header(#{task_type := <<"init">>}) ->
|
||||
{init, undefined};
|
||||
create_header(#{task_type := <<"repair">>}) ->
|
||||
{repair, undefined};
|
||||
create_header(#{task_type := <<"notify">>}) ->
|
||||
{notify, undefined}.
|
||||
%%
|
||||
action_to_task_type(#{remove := true}) ->
|
||||
<<"remove">>;
|
||||
action_to_task_type(#{set_timer := _}) ->
|
||||
<<"timeout">>.
|
||||
|
||||
maybe_wait_call(undefined) ->
|
||||
ok;
|
||||
maybe_wait_call(Timeout) ->
|
||||
timer:sleep(Timeout).
|
308
src/prg_worker_sidecar.erl
Normal file
308
src/prg_worker_sidecar.erl
Normal file
@ -0,0 +1,308 @@
|
||||
-module(prg_worker_sidecar).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
-export([start_link/0]).
|
||||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
|
||||
code_change/3]).
|
||||
|
||||
%% Processor functions wrapper
|
||||
-export([process/5]).
|
||||
%% Storage functions wrapper
|
||||
-export([complete_and_continue/8]).
|
||||
-export([complete_and_suspend/7]).
|
||||
-export([complete_and_unlock/7]).
|
||||
-export([complete_and_error/6]).
|
||||
-export([remove_process/5]).
|
||||
%% Notifier functions wrapper
|
||||
-export([event_sink/5]).
|
||||
-export([lifecycle_sink/5]).
|
||||
%%
|
||||
-export([get_process/5]).
|
||||
-export([get_task/5]).
|
||||
|
||||
-type context() :: binary().
|
||||
-type args() :: term().
|
||||
-type request() :: {task_t(), args(), process()}.
|
||||
|
||||
-record(prg_sidecar_state, {}).
|
||||
|
||||
-define(DEFAULT_DELAY, 3000).
|
||||
-define(PROCESSING_KEY, progressor_task_processing_duration_ms).
|
||||
-define(COMPLETION_KEY, progressor_task_completion_duration_ms).
|
||||
-define(REMOVING_KEY, progressor_process_removing_duration_ms).
|
||||
-define(NOTIFICATION_KEY, progressor_notification_duration_ms).
|
||||
|
||||
-dialyzer({nowarn_function, do_with_retry/2}).
|
||||
%% API
|
||||
|
||||
%% processor wrapper
|
||||
-spec process(pid(), timestamp_ms(), namespace_opts(), request(), context()) ->
|
||||
{ok, _Result} | {error, _Reason} | no_return().
|
||||
process(Pid, Deadline, #{namespace := NS} = NsOpts, {TaskType, _, _} = Request, Context) ->
|
||||
Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() -> gen_server:call(Pid, {process, NsOpts, Request, Context}, Timeout) end,
|
||||
prg_utils:with_observe(Fun, ?PROCESSING_KEY, [NS, erlang:atom_to_list(TaskType)]).
|
||||
|
||||
%% storage wrappers
|
||||
-spec complete_and_continue(
|
||||
pid(),
|
||||
timestamp_ms(),
|
||||
storage_opts(),
|
||||
namespace_id(),
|
||||
task_result(),
|
||||
process(),
|
||||
[event()],
|
||||
task()
|
||||
) -> {ok, [task()]} | no_return().
|
||||
complete_and_continue(Pid, _Deadline, StorageOpts, NsId, TaskResult, Process, Events, Task) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() ->
|
||||
gen_server:call(
|
||||
Pid,
|
||||
{complete_and_continue, StorageOpts, NsId, TaskResult, Process, Events, Task},
|
||||
infinity
|
||||
)
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?COMPLETION_KEY, [erlang:atom_to_list(NsId), "complete_and_continue"]).
|
||||
|
||||
-spec complete_and_suspend(
|
||||
pid(),
|
||||
timestamp_ms(),
|
||||
storage_opts(),
|
||||
namespace_id(),
|
||||
task_result(),
|
||||
process(),
|
||||
[event()]
|
||||
) -> {ok, [task()]} | no_return().
|
||||
complete_and_suspend(Pid, _Deadline, StorageOpts, NsId, TaskResult, Process, Events) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() ->
|
||||
gen_server:call(
|
||||
Pid,
|
||||
{complete_and_suspend, StorageOpts, NsId, TaskResult, Process, Events}, infinity
|
||||
)
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?COMPLETION_KEY, [erlang:atom_to_list(NsId), "complete_and_suspend"]).
|
||||
|
||||
-spec complete_and_unlock(
|
||||
pid(),
|
||||
timestamp_ms(),
|
||||
storage_opts(),
|
||||
namespace_id(),
|
||||
task_result(),
|
||||
process(),
|
||||
[event()]
|
||||
) -> {ok, [task()]} | no_return().
|
||||
complete_and_unlock(Pid, _Deadline, StorageOpts, NsId, TaskResult, Process, Events) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() ->
|
||||
gen_server:call(
|
||||
Pid,
|
||||
{complete_and_unlock, StorageOpts, NsId, TaskResult, Process, Events}, infinity
|
||||
)
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?COMPLETION_KEY, [erlang:atom_to_list(NsId), "complete_and_unlock"]).
|
||||
|
||||
-spec complete_and_error(pid(), timestamp_ms(), storage_opts(), namespace_id(), task_result(), process()) ->
|
||||
ok | no_return().
|
||||
complete_and_error(Pid, _Deadline, StorageOpts, NsId, TaskResult, Process) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() ->
|
||||
gen_server:call(
|
||||
Pid,
|
||||
{complete_and_error, StorageOpts, NsId, TaskResult, Process}, infinity
|
||||
)
|
||||
end,
|
||||
prg_utils:with_observe(Fun, ?COMPLETION_KEY, [erlang:atom_to_list(NsId), "complete_and_unlock"]).
|
||||
|
||||
-spec remove_process(pid(), timestamp_ms(), storage_opts(), namespace_id(), id()) ->
|
||||
ok | no_return().
|
||||
remove_process(Pid, _Deadline, StorageOpts, NsId, ProcessId) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() -> gen_server:call(Pid, {remove_process, StorageOpts, NsId, ProcessId}, infinity) end,
|
||||
prg_utils:with_observe(Fun, ?REMOVING_KEY, [erlang:atom_to_list(NsId)]).
|
||||
|
||||
%% notifier wrappers
|
||||
|
||||
-spec event_sink(pid(), timestamp_ms(), namespace_opts(), id(), [event()]) -> ok | no_return().
|
||||
event_sink(Pid, Deadline, #{namespace := Ns} = NsOpts, ProcessId, Events) ->
|
||||
Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() -> gen_server:call(Pid, {event_sink, NsOpts, ProcessId, Events}, Timeout) end,
|
||||
prg_utils:with_observe(Fun, ?NOTIFICATION_KEY, [Ns, "event_sink"]).
|
||||
|
||||
-spec lifecycle_sink(pid(), timestamp_ms(), namespace_opts(), task_t() | {error, _Reason}, id()) ->
|
||||
ok | no_return().
|
||||
lifecycle_sink(Pid, Deadline, #{namespace := Ns} = NsOpts, TaskType, ProcessId) ->
|
||||
Timeout = Deadline - erlang:system_time(millisecond),
|
||||
Fun = fun() -> gen_server:call(Pid, {lifecycle_sink, NsOpts, TaskType, ProcessId}, Timeout) end,
|
||||
prg_utils:with_observe(Fun, ?NOTIFICATION_KEY, [Ns, "lifecycle_sink"]).
|
||||
%%
|
||||
|
||||
-spec get_process(pid(), timestamp_ms(), storage_opts(), namespace_id(), id()) ->
|
||||
{ok, process()} | {error, _Reason}.
|
||||
get_process(Pid, _Deadline, StorageOpts, NsId, ProcessId) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
gen_server:call(Pid, {get_process, StorageOpts, NsId, ProcessId}, infinity).
|
||||
|
||||
-spec get_task(pid(), timestamp_ms(), storage_opts(), namespace_id(), task_id()) ->
|
||||
{ok, task()} | {error, _Reason}.
|
||||
get_task(Pid, _Deadline, StorageOpts, NsId, TaskId) ->
|
||||
%% Timeout = Deadline - erlang:system_time(millisecond),
|
||||
gen_server:call(Pid, {get_task, StorageOpts, NsId, TaskId}, infinity).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Spawning and gen_server implementation
|
||||
%%%===================================================================
|
||||
|
||||
start_link() ->
|
||||
gen_server:start_link(?MODULE, [], []).
|
||||
|
||||
init([]) ->
|
||||
{ok, #prg_sidecar_state{}}.
|
||||
|
||||
handle_call(
|
||||
{
|
||||
process,
|
||||
#{processor := #{client := Handler, options := Options}, namespace := _NsName} = _NsOpts,
|
||||
Request,
|
||||
Ctx
|
||||
},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Response =
|
||||
try Handler:process(Request, Options, Ctx) of
|
||||
{ok, _Result} = OK -> OK;
|
||||
{error, _Reason} = ERR -> ERR;
|
||||
Unsupported ->
|
||||
logger:error("processor unexpected result: ~p", [Unsupported]),
|
||||
{error, <<"unsupported_result">>}
|
||||
catch
|
||||
Class:Term:Trace ->
|
||||
logger:error("processor exception: ~p", [[Class, Term, Trace]]),
|
||||
{error, {exception, Class, Term}}
|
||||
end,
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{complete_and_continue, StorageOpts, NsId, TaskResult, Process, Events, Task},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:complete_and_continue(StorageOpts, NsId, TaskResult, Process, Events, Task)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{remove_process, StorageOpts, NsId, ProcessId},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:remove_process(StorageOpts, NsId, ProcessId)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{get_process, StorageOpts, NsId, ProcessId},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:get_process(StorageOpts, NsId, ProcessId)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{get_task, StorageOpts, NsId, TaskId},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:get_task(StorageOpts, NsId, TaskId)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{complete_and_suspend, StorageOpts, NsId, TaskResult, Process, Events},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:complete_and_suspend(StorageOpts, NsId, TaskResult, Process, Events)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{complete_and_unlock, StorageOpts, NsId, TaskResult, Process, Events},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:complete_and_unlock(StorageOpts, NsId, TaskResult, Process, Events)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call(
|
||||
{complete_and_error, StorageOpts, NsId, TaskResult, Process},
|
||||
_From,
|
||||
State = #prg_sidecar_state{}
|
||||
) ->
|
||||
Fun = fun() ->
|
||||
prg_storage:complete_and_error(StorageOpts, NsId, TaskResult, Process)
|
||||
end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call({event_sink, NsOpts, ProcessId, Events}, _From, State) ->
|
||||
Fun = fun() -> prg_notifier:event_sink(NsOpts, ProcessId, Events) end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State};
|
||||
|
||||
handle_call({lifecycle_sink, NsOpts, TaskType, ProcessId}, _From, State) ->
|
||||
Fun = fun() -> prg_notifier:lifecycle_sink(NsOpts, TaskType, ProcessId) end,
|
||||
Response = do_with_retry(Fun, ?DEFAULT_DELAY),
|
||||
{reply, Response, State}.
|
||||
|
||||
handle_cast(_Request, State = #prg_sidecar_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
handle_info(_Info, State = #prg_sidecar_state{}) ->
|
||||
{noreply, State}.
|
||||
|
||||
terminate(_Reason, _State = #prg_sidecar_state{}) ->
|
||||
ok.
|
||||
|
||||
code_change(_OldVsn, State = #prg_sidecar_state{}, _Extra) ->
|
||||
{ok, State}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
||||
|
||||
do_with_retry(Fun, Delay) ->
|
||||
try Fun() of
|
||||
ok = Result ->
|
||||
Result;
|
||||
{ok, _} = Result ->
|
||||
Result;
|
||||
Error ->
|
||||
_ = logger:error("result processing error: ~p", [Error]),
|
||||
timer:sleep(Delay),
|
||||
do_with_retry(Fun, Delay)
|
||||
catch
|
||||
Class:Error:Trace ->
|
||||
_ = logger:error("result processing exception: ~p", [[Class, Error, Trace]]),
|
||||
timer:sleep(Delay),
|
||||
do_with_retry(Fun, Delay)
|
||||
end.
|
43
src/prg_worker_sup.erl
Normal file
43
src/prg_worker_sup.erl
Normal file
@ -0,0 +1,43 @@
|
||||
-module(prg_worker_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
%% API
|
||||
-export([start_link/1]).
|
||||
|
||||
%% Supervisor callbacks
|
||||
-export([init/1]).
|
||||
|
||||
-define(SERVER, ?MODULE).
|
||||
|
||||
%%%===================================================================
|
||||
%%% API functions
|
||||
%%%===================================================================
|
||||
|
||||
-spec(start_link(term()) -> {ok, Pid :: pid()} | ignore | {error, Reason :: term()}).
|
||||
start_link({NsId, _} = NS) ->
|
||||
RegName = prg_utils:registered_name(NsId, "_worker_sup"),
|
||||
supervisor:start_link({local, RegName}, ?MODULE, NS).
|
||||
|
||||
%%%===================================================================
|
||||
%%% Supervisor callbacks
|
||||
%%%===================================================================
|
||||
|
||||
init({NsId, NsOpts}) ->
|
||||
MaxRestarts = 1000,
|
||||
MaxSecondsBetweenRestarts = 3600,
|
||||
SupFlags = #{strategy => simple_one_for_one,
|
||||
intensity => MaxRestarts,
|
||||
period => MaxSecondsBetweenRestarts},
|
||||
ChildSpecs = [
|
||||
#{
|
||||
id => prg_utils:registered_name(NsId, "_prg_worker"),
|
||||
start => {prg_worker, start_link, [NsId, NsOpts]}
|
||||
}
|
||||
],
|
||||
|
||||
{ok, {SupFlags, ChildSpecs}}.
|
||||
|
||||
%%%===================================================================
|
||||
%%% Internal functions
|
||||
%%%===================================================================
|
22
src/progressor.app.src
Normal file
22
src/progressor.app.src
Normal file
@ -0,0 +1,22 @@
|
||||
{application, progressor,
|
||||
[{description, "An OTP application"},
|
||||
{vsn, "0.1.0"},
|
||||
{registered, []},
|
||||
{mod, {progressor_app, []}},
|
||||
{applications,
|
||||
[kernel,
|
||||
stdlib,
|
||||
jsx,
|
||||
herd,
|
||||
prometheus,
|
||||
epg_connector,
|
||||
thrift,
|
||||
mg_proto,
|
||||
brod
|
||||
]},
|
||||
{env,[]},
|
||||
{modules, []},
|
||||
|
||||
{licenses, ["MIT"]},
|
||||
{links, []}
|
||||
]}.
|
259
src/progressor.erl
Normal file
259
src/progressor.erl
Normal file
@ -0,0 +1,259 @@
|
||||
-module(progressor).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
-define(TASK_REPEAT_REQUEST_TIMEOUT, 1000).
|
||||
-define(PREPARING_KEY, progressor_request_preparing_duration_ms).
|
||||
|
||||
%% Public API
|
||||
-export([init/1]).
|
||||
-export([call/1]).
|
||||
-export([repair/1]).
|
||||
-export([get/1]).
|
||||
%% TODO
|
||||
%% -export([remove/1]).
|
||||
|
||||
%% Internal API
|
||||
-export([reply/2]).
|
||||
|
||||
%-ifdef(TEST).
|
||||
-export([cleanup/1]).
|
||||
%-endif.
|
||||
|
||||
-type request() :: #{
|
||||
ns := namespace_id(),
|
||||
id := id(),
|
||||
args => term(),
|
||||
idempotency_key => binary(),
|
||||
context => binary()
|
||||
}.
|
||||
|
||||
%% see receive blocks bellow in this module
|
||||
-spec reply(pid(), term()) -> term().
|
||||
reply(Pid, Msg) ->
|
||||
Pid ! Msg.
|
||||
|
||||
%% API
|
||||
-spec init(request()) -> {ok, _Result} | {error, _Reason}.
|
||||
init(Req) ->
|
||||
prg_utils:pipe([
|
||||
fun add_ns_opts/1,
|
||||
fun check_idempotency/1,
|
||||
fun add_task/1,
|
||||
fun(Data) -> prepare(fun prg_storage:prepare_init/4, Data) end,
|
||||
fun process_call/1
|
||||
], Req#{type => init}).
|
||||
|
||||
-spec call(request()) -> {ok, _Result} | {error, _Reason}.
|
||||
call(Req) ->
|
||||
prg_utils:pipe([
|
||||
fun add_ns_opts/1,
|
||||
fun check_idempotency/1,
|
||||
fun(Data) -> check_process_status(Data, <<"running">>) end,
|
||||
fun add_task/1,
|
||||
fun(Data) -> prepare(fun prg_storage:prepare_call/4, Data) end,
|
||||
fun process_call/1
|
||||
], Req#{type => call}).
|
||||
|
||||
-spec repair(request()) -> {ok, _Result} | {error, _Reason}.
|
||||
repair(Req) ->
|
||||
prg_utils:pipe([
|
||||
fun add_ns_opts/1,
|
||||
fun check_idempotency/1,
|
||||
fun(Data) -> check_process_status(Data, <<"error">>) end,
|
||||
fun add_task/1,
|
||||
fun(Data) -> prepare(fun prg_storage:prepare_repair/4, Data) end,
|
||||
fun process_call/1
|
||||
], Req#{type => repair}).
|
||||
|
||||
-spec get(request()) -> {ok, _Result} | {error, _Reason}.
|
||||
get(Req) ->
|
||||
prg_utils:pipe([
|
||||
fun add_ns_opts/1,
|
||||
fun do_get_request/1
|
||||
], Req).
|
||||
|
||||
%-ifdef(TEST).
|
||||
|
||||
-spec cleanup(_) -> _.
|
||||
cleanup(Opts) ->
|
||||
prg_utils:pipe([
|
||||
fun add_ns_opts/1,
|
||||
fun cleanup_storage/1
|
||||
], Opts).
|
||||
|
||||
cleanup_storage(#{ns := NsId, ns_opts := #{storage := StorageOpts}}) ->
|
||||
ok = prg_storage:cleanup(StorageOpts, NsId).
|
||||
|
||||
%-endif.
|
||||
|
||||
%% Internal functions
|
||||
|
||||
add_ns_opts(#{ns := NsId} = Opts) ->
|
||||
NSs = application:get_env(progressor, namespaces, #{}),
|
||||
case maps:get(NsId, NSs, undefined) of
|
||||
undefined ->
|
||||
{error, <<"namespace not found">>};
|
||||
NsOpts ->
|
||||
Opts#{ns_opts => prg_utils:make_ns_opts(NsId, NsOpts)}
|
||||
end.
|
||||
|
||||
check_idempotency(#{idempotency_key := _IdempotencyKey} = Req) ->
|
||||
case get_task_result(Req) of
|
||||
not_found -> Req;
|
||||
Result -> {break, Result}
|
||||
end;
|
||||
check_idempotency(Req) ->
|
||||
Req.
|
||||
|
||||
add_task(#{id := Id, args := Args, type := Type} = Opts) ->
|
||||
Context = maps:get(context, Opts, <<>>),
|
||||
TaskData = #{
|
||||
process_id => Id,
|
||||
args => Args,
|
||||
task_type => convert_task_type(Type),
|
||||
context => Context
|
||||
},
|
||||
Task = make_task(maybe_add_idempotency(TaskData, maps:get(idempotency_key, Opts, undefined))),
|
||||
Opts#{task => Task}.
|
||||
|
||||
check_process_status(#{ns_opts := #{storage := StorageOpts}, id := Id, ns := NsId} = Opts, ExpectedStatus) ->
|
||||
case prg_storage:get_process_status(StorageOpts, NsId, Id) of
|
||||
{ok, ExpectedStatus} -> Opts;
|
||||
{ok, OtherStatus} -> {error, <<"process is ", OtherStatus/binary>>};
|
||||
{error, _} = Error -> Error
|
||||
end.
|
||||
|
||||
prepare(Fun, #{ns_opts := #{storage := StorageOpts} = NsOpts, ns := NsId, id := ProcessId, task := Task} = Req) ->
|
||||
Worker = capture_worker(NsId),
|
||||
TaskStatus = check_for_run(Worker),
|
||||
TaskType = maps:get(task_type, Task),
|
||||
PrepareResult = prg_utils:with_observe(
|
||||
fun() -> Fun(StorageOpts, NsId, ProcessId, Task#{status => TaskStatus}) end,
|
||||
?PREPARING_KEY,
|
||||
[erlang:atom_to_binary(NsId, utf8), TaskType]
|
||||
),
|
||||
case PrepareResult of
|
||||
{ok, {continue, TaskId}} ->
|
||||
Req#{task => Task#{task_id => TaskId}, worker => Worker};
|
||||
{ok, {postpone, TaskId}} ->
|
||||
ok = return_worker(NsId, Worker),
|
||||
TimeoutSec = maps:get(process_step_timeout, NsOpts, ?DEFAULT_STEP_TIMEOUT_SEC),
|
||||
Timeout = TimeoutSec * 1000,
|
||||
case await_task_result(StorageOpts, NsId, {task_id, TaskId}, Timeout, 0) of
|
||||
{ok, _Result} = OK ->
|
||||
{break, OK};
|
||||
{error, _Reason} = ERR ->
|
||||
ERR
|
||||
end;
|
||||
{error, _} = Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
get_task_result(#{ns_opts := #{storage := StorageOpts} = NsOpts, ns := NsId, idempotency_key := IdempotencyKey}) ->
|
||||
case prg_storage:get_task_result(StorageOpts, NsId, {idempotency_key, IdempotencyKey}) of
|
||||
{ok, Result} ->
|
||||
Result;
|
||||
{error, not_found} ->
|
||||
not_found;
|
||||
{error, in_progress} ->
|
||||
TimeoutSec = maps:get(process_step_timeout, NsOpts, ?DEFAULT_STEP_TIMEOUT_SEC),
|
||||
Timeout = TimeoutSec * 1000,
|
||||
await_task_result(StorageOpts, NsId, {idempotency_key, IdempotencyKey}, Timeout, 0)
|
||||
end.
|
||||
|
||||
await_task_result(_StorageOpts, _NsId, _KeyOrId, Timeout, Duration) when Duration > Timeout ->
|
||||
{error, <<"timeout">>};
|
||||
await_task_result(StorageOpts, NsId, KeyOrId, Timeout, Duration) ->
|
||||
case prg_storage:get_task_result(StorageOpts, NsId, KeyOrId) of
|
||||
{ok, Result} ->
|
||||
Result;
|
||||
{error, _} ->
|
||||
timer:sleep(?TASK_REPEAT_REQUEST_TIMEOUT),
|
||||
await_task_result(StorageOpts, NsId, KeyOrId, Timeout, Duration + ?TASK_REPEAT_REQUEST_TIMEOUT)
|
||||
end.
|
||||
|
||||
do_get_request(#{ns_opts := #{storage := StorageOpts}, id := Id, ns := NsId, args := HistoryRange}) ->
|
||||
prg_storage:get_process(external, StorageOpts, NsId, Id, HistoryRange);
|
||||
do_get_request(#{ns_opts := #{storage := StorageOpts}, id := Id, ns := NsId}) ->
|
||||
prg_storage:get_process(external, StorageOpts, NsId, Id, #{}).
|
||||
|
||||
process_call(#{ns_opts := NsOpts, ns := NsId, type := Type, task := Task, worker := Worker}) ->
|
||||
TimeoutSec = maps:get(process_step_timeout, NsOpts, ?DEFAULT_STEP_TIMEOUT_SEC),
|
||||
Timeout = TimeoutSec * 1000,
|
||||
Ref = make_ref(),
|
||||
TaskHeader = make_task_header(Type, Ref),
|
||||
ok = prg_worker:process_task(Worker, TaskHeader, Task),
|
||||
ok = prg_scheduler:release_worker(NsId, self(), Worker),
|
||||
%% see fun reply/2
|
||||
receive
|
||||
{Ref, Result} ->
|
||||
Result
|
||||
after Timeout ->
|
||||
{error, <<"timeout">>}
|
||||
end.
|
||||
|
||||
make_task_header(init, Ref) ->
|
||||
{init, {self(), Ref}};
|
||||
make_task_header(call, Ref) ->
|
||||
{call, {self(), Ref}};
|
||||
make_task_header(repair, Ref) ->
|
||||
{repair, {self(), Ref}};
|
||||
make_task_header(notify, _Ref) ->
|
||||
{notify, undefined}.
|
||||
|
||||
convert_task_type(init) ->
|
||||
<<"init">>;
|
||||
convert_task_type(call) ->
|
||||
<<"call">>;
|
||||
convert_task_type(notify) ->
|
||||
<<"notify">>;
|
||||
convert_task_type(repair) ->
|
||||
<<"repair">>.
|
||||
|
||||
maybe_add_idempotency(Task, undefined) ->
|
||||
Task;
|
||||
maybe_add_idempotency(Task, IdempotencyKey) ->
|
||||
Task#{idempotency_key => IdempotencyKey}.
|
||||
|
||||
make_task(#{task_type := TaskType} = TaskData) when
|
||||
TaskType =:= <<"init">>;
|
||||
TaskType =:= <<"call">>;
|
||||
TaskType =:= <<"repair">>
|
||||
->
|
||||
Now = erlang:system_time(second),
|
||||
Defaults = #{
|
||||
status => <<"running">>,
|
||||
scheduled_time => Now,
|
||||
running_time => Now,
|
||||
last_retry_interval => 0,
|
||||
attempts_count => 0
|
||||
},
|
||||
maps:merge(Defaults, TaskData);
|
||||
make_task(#{task_type := <<"notify">>} = TaskData) ->
|
||||
Now = erlang:system_time(second),
|
||||
Defaults = #{
|
||||
status => <<"running">>,
|
||||
scheduled_time => Now,
|
||||
running_time => Now,
|
||||
response => term_to_binary({ok, ok}),
|
||||
last_retry_interval => 0,
|
||||
attempts_count => 0
|
||||
},
|
||||
maps:merge(Defaults, TaskData).
|
||||
|
||||
capture_worker(NsId) ->
|
||||
case prg_scheduler:capture_worker(NsId, self()) of
|
||||
{ok, Worker} -> Worker;
|
||||
{error, _} -> undefined
|
||||
end.
|
||||
|
||||
return_worker(_NsId, undefined) ->
|
||||
ok;
|
||||
return_worker(NsId, Worker) ->
|
||||
prg_scheduler:return_worker(NsId, self(), Worker).
|
||||
|
||||
check_for_run(undefined) ->
|
||||
<<"waiting">>;
|
||||
check_for_run(Pid) when is_pid(Pid) ->
|
||||
<<"running">>.
|
77
src/progressor_app.erl
Normal file
77
src/progressor_app.erl
Normal file
@ -0,0 +1,77 @@
|
||||
%%%-------------------------------------------------------------------
|
||||
%% @doc progressor public API
|
||||
%% @end
|
||||
%%%-------------------------------------------------------------------
|
||||
|
||||
-module(progressor_app).
|
||||
|
||||
-behaviour(application).
|
||||
|
||||
-export([start/2, stop/1]).
|
||||
|
||||
start(_StartType, _StartArgs) ->
|
||||
create_metrics(),
|
||||
progressor_sup:start_link().
|
||||
|
||||
stop(_State) ->
|
||||
ok.
|
||||
|
||||
%% internal functions
|
||||
|
||||
create_metrics() ->
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_calls_scanning_duration_ms},
|
||||
{help, "Calls (call, repair) scanning durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_timers_scanning_duration_ms},
|
||||
{help, "Timers (timeout, remove) scanning durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_zombie_collection_duration_ms},
|
||||
{help, "Zombie tasks collecting durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_request_preparing_duration_ms},
|
||||
{help, "Preparing request (init, call, repair) durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace, task_type]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_task_processing_duration_ms},
|
||||
{help, "Task processing durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace, task_type]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_task_completion_duration_ms},
|
||||
{help, "Task completion durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace, completion_type]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_process_removing_duration_ms},
|
||||
{help, "Task completion durations in millisecond"},
|
||||
{buckets, [50, 150, 300, 500, 750, 1000]},
|
||||
{labels, [namespace]}
|
||||
]),
|
||||
|
||||
_ = prometheus_histogram:new([
|
||||
{name, progressor_notification_duration_ms},
|
||||
{help, "Notification durations in millisecond"},
|
||||
{buckets, [10, 50, 150, 300, 500, 1000]},
|
||||
{labels, [namespace, notification_type]}
|
||||
]).
|
40
src/progressor_sup.erl
Normal file
40
src/progressor_sup.erl
Normal file
@ -0,0 +1,40 @@
|
||||
-module(progressor_sup).
|
||||
|
||||
-behaviour(supervisor).
|
||||
|
||||
-include("progressor.hrl").
|
||||
|
||||
-export([start_link/0]).
|
||||
|
||||
-export([init/1]).
|
||||
|
||||
-define(SERVER, ?MODULE).
|
||||
|
||||
start_link() ->
|
||||
supervisor:start_link({local, ?SERVER}, ?MODULE, []).
|
||||
|
||||
init([]) ->
|
||||
SupFlags = #{strategy => one_for_one,
|
||||
intensity => 0,
|
||||
period => 1},
|
||||
ChildSpecs = maps:fold(
|
||||
fun(ID, NsOpts, Acc) ->
|
||||
FullOpts = prg_utils:make_ns_opts(ID, NsOpts),
|
||||
NS = {ID, FullOpts},
|
||||
[
|
||||
#{
|
||||
id => ID,
|
||||
start => {prg_namespace_sup, start_link, [NS]},
|
||||
type => supervisor
|
||||
} | Acc
|
||||
]
|
||||
end,
|
||||
[],
|
||||
namespaces()
|
||||
),
|
||||
{ok, {SupFlags, ChildSpecs}}.
|
||||
|
||||
%% internal functions
|
||||
|
||||
namespaces() ->
|
||||
application:get_env(progressor, namespaces, #{}).
|
1059
src/storage/postgres/prg_pg_backend.erl
Normal file
1059
src/storage/postgres/prg_pg_backend.erl
Normal file
File diff suppressed because it is too large
Load Diff
799
test/prg_base_SUITE.erl
Normal file
799
test/prg_base_SUITE.erl
Normal file
@ -0,0 +1,799 @@
|
||||
-module(prg_base_SUITE).
|
||||
|
||||
-include_lib("stdlib/include/assert.hrl").
|
||||
|
||||
%% API
|
||||
-export([
|
||||
init_per_suite/1,
|
||||
end_per_suite/1,
|
||||
all/0
|
||||
]).
|
||||
|
||||
%% Tests
|
||||
-export([simple_timers_test/1]).
|
||||
-export([simple_call_test/1]).
|
||||
-export([call_replace_timer_test/1]).
|
||||
-export([call_unset_timer_test/1]).
|
||||
-export([postponed_call_test/1]).
|
||||
-export([postponed_call_to_suspended_process_test/1]).
|
||||
-export([multiple_calls_test/1]).
|
||||
-export([repair_after_non_retriable_error_test/1]).
|
||||
-export([error_after_max_retries_test/1]).
|
||||
-export([repair_after_call_error_test/1]).
|
||||
-export([remove_by_timer_test/1]).
|
||||
-export([remove_without_timer_test/1]).
|
||||
|
||||
-define(NS, 'default/default').
|
||||
|
||||
init_per_suite(Config) ->
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok.
|
||||
|
||||
all() -> [
|
||||
simple_timers_test,
|
||||
simple_call_test,
|
||||
call_replace_timer_test,
|
||||
call_unset_timer_test,
|
||||
postponed_call_test,
|
||||
postponed_call_to_suspended_process_test,
|
||||
multiple_calls_test,
|
||||
repair_after_non_retriable_error_test,
|
||||
error_after_max_retries_test,
|
||||
repair_after_call_error_test,
|
||||
remove_by_timer_test,
|
||||
remove_without_timer_test
|
||||
].
|
||||
|
||||
-spec simple_timers_test(_) -> _.
|
||||
simple_timers_test(_C) ->
|
||||
%% steps:
|
||||
%% step aux_state events action
|
||||
%% 1. init -> aux_state1, [event1], timer 2s
|
||||
%% 2. timeout -> aux_state2, [event2], timer 0s
|
||||
%% 3. timeout -> undefined, [], undefined
|
||||
_ = mock_processor(simple_timers_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
3 = expect_steps_counter(3),
|
||||
ExpectedAux = erlang:term_to_binary(<<"aux_state2">>),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
aux_state := ExpectedAux,
|
||||
metadata := #{<<"k">> := <<"v">>},
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
unmock_processor(),
|
||||
ok.
|
||||
%%
|
||||
-spec simple_call_test(_) -> _.
|
||||
simple_call_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [event1], timer 2s
|
||||
%% 2. call -> [event2], undefined (duration 3s)
|
||||
%% 3. timeout -> [event3], undefined
|
||||
_ = mock_processor(simple_call_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, <<"response">>} = progressor:call(#{ns => ?NS, id => Id, args => <<"call_args">>}),
|
||||
3 = expect_steps_counter(3),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
},
|
||||
#{
|
||||
event_id := 3,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl3,
|
||||
timestamp := _Ts3
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec call_replace_timer_test(_) -> _.
|
||||
call_replace_timer_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [event1], timer 2s + remove
|
||||
%% 2. call -> [], timer 0s (new timer cancel remove)
|
||||
%% 3. timeout -> [event2], undefined
|
||||
_ = mock_processor(call_replace_timer_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, <<"response">>} = progressor:call(#{ns => ?NS, id => Id, args => <<"call_args">>}),
|
||||
3 = expect_steps_counter(3),
|
||||
%% wait task_scan_timeout, maybe remove works
|
||||
timer:sleep(4000),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec call_unset_timer_test(_) -> _.
|
||||
call_unset_timer_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [event1], timer 2s
|
||||
%% 2. call -> [], unset_timer
|
||||
_ = mock_processor(call_unset_timer_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, <<"response">>} = progressor:call(#{ns => ?NS, id => Id, args => <<"call_args">>}),
|
||||
%% wait 3 steps but got 2 - good!
|
||||
2 = expect_steps_counter(3),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec postponed_call_test(_) -> _.
|
||||
postponed_call_test(_C) ->
|
||||
%% call between 0 sec timers
|
||||
%% steps:
|
||||
%% 1. init -> [], timer 0s
|
||||
%% 2. timeout -> [event1], timer 0s (process duration 3000)
|
||||
%% 3. call -> [event2], undefined
|
||||
%% 4. timeout -> [event3], undefined
|
||||
_ = mock_processor(postponed_call_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, <<"response">>} = progressor:call(#{ns => ?NS, id => Id, args => <<"call_args">>}),
|
||||
4 = expect_steps_counter(4),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
},
|
||||
#{
|
||||
event_id := 3,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl3,
|
||||
timestamp := _Ts3
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec postponed_call_to_suspended_process_test(_) -> _.
|
||||
postponed_call_to_suspended_process_test(_C) ->
|
||||
%% call between 0 sec timers
|
||||
%% steps:
|
||||
%% 1. init -> [], timer 0s
|
||||
%% 2. timeout -> [event1], undefined (process duration 3000)
|
||||
%% 3. call -> [event2], undefined
|
||||
_ = mock_processor(postponed_call_to_suspended_process_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, <<"response">>} = progressor:call(#{ns => ?NS, id => Id, args => <<"call_args">>}),
|
||||
3 = expect_steps_counter(3),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec multiple_calls_test(_) -> _.
|
||||
multiple_calls_test(_C) ->
|
||||
%% call between 0 sec timers
|
||||
%% steps:
|
||||
%% 1. init -> [], undefined
|
||||
%% 2. call -> [event1], undefined
|
||||
%% ...
|
||||
%% 11. call -> [event10], undefined
|
||||
_ = mock_processor(multiple_calls_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
lists:foreach(
|
||||
fun(N) ->
|
||||
spawn(progressor, call, [#{ns => ?NS, id => Id, args => <<N>>}])
|
||||
end,
|
||||
lists:seq(1, 10)
|
||||
),
|
||||
11 = expect_steps_counter(33),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{event_id := 1},
|
||||
#{event_id := 2},
|
||||
#{event_id := 3},
|
||||
#{event_id := 4},
|
||||
#{event_id := 5},
|
||||
#{event_id := 6},
|
||||
#{event_id := 7},
|
||||
#{event_id := 8},
|
||||
#{event_id := 9},
|
||||
#{event_id := 10}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
|
||||
-spec repair_after_non_retriable_error_test(_) -> _.
|
||||
repair_after_non_retriable_error_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [], timer 0s
|
||||
%% 2. timeout -> {error, do_not_retry}
|
||||
%% 3. repair -> [event1], undefined
|
||||
%% 4. timeout -> [event2], undefined
|
||||
_ = mock_processor(repair_after_non_retriable_error_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
2 = expect_steps_counter(2),
|
||||
{ok, #{
|
||||
detail := <<"do_not_retry">>,
|
||||
history := [],
|
||||
process_id := Id,
|
||||
status := <<"error">>
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
{ok, ok} = progressor:repair(#{ns => ?NS, id => Id, args => <<"repair_args">>}),
|
||||
4 = expect_steps_counter(4),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
}
|
||||
]
|
||||
} = Process} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
false = erlang:is_map_key(detail, Process),
|
||||
ok.
|
||||
%%
|
||||
-spec error_after_max_retries_test(_) -> _.
|
||||
error_after_max_retries_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [], timer 0s
|
||||
%% 2. timeout -> {error, retry_this}
|
||||
%% 3. timeout -> {error, retry_this}
|
||||
%% 4. timeout -> {error, retry_this}
|
||||
_ = mock_processor(error_after_max_retries_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
4 = expect_steps_counter(4),
|
||||
{ok, #{
|
||||
detail := <<"retry_this">>,
|
||||
history := [],
|
||||
process_id := Id,
|
||||
status := <<"error">>
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec repair_after_call_error_test(_) -> _.
|
||||
repair_after_call_error_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [], undefined
|
||||
%% 2. call -> {error, retry_this}
|
||||
%% 3. repair -> {error, repair_error}
|
||||
%% 4. repair -> [event1], undefined
|
||||
_ = mock_processor(repair_after_call_error_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{error, retry_this} = progressor:call(#{ns => ?NS, id => Id, args => <<"call_args">>}),
|
||||
2 = expect_steps_counter(2),
|
||||
{ok, #{
|
||||
detail := <<"retry_this">>,
|
||||
metadata := #{<<"k">> := <<"v">>},
|
||||
history := [],
|
||||
process_id := Id,
|
||||
status := <<"error">>
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
{error, <<"repair_error">>} = progressor:repair(#{ns => ?NS, id => Id, args => <<"bad_repair_args">>}),
|
||||
3 = expect_steps_counter(3),
|
||||
%% shoul not rewrite detail
|
||||
{ok, #{
|
||||
detail := <<"retry_this">>,
|
||||
metadata := #{<<"k">> := <<"v">>},
|
||||
history := [],
|
||||
process_id := Id,
|
||||
status := <<"error">>
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
{ok, ok} = progressor:repair(#{ns => ?NS, id => Id, args => <<"repair_args">>}),
|
||||
4 = expect_steps_counter(4),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
metadata := #{<<"k2">> := <<"v2">>},
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec remove_by_timer_test(_) -> _.
|
||||
remove_by_timer_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [event1, event2], timer 2s + remove
|
||||
_ = mock_processor(remove_by_timer_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
},
|
||||
#{
|
||||
event_id := 2,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl2,
|
||||
timestamp := _Ts2
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
%% wait tsk_scan_timeout
|
||||
timer:sleep(4000),
|
||||
{error, <<"process not found">>} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
%%
|
||||
-spec remove_without_timer_test(_) -> _.
|
||||
remove_without_timer_test(_C) ->
|
||||
%% steps:
|
||||
%% 1. init -> [event1], timer 2s
|
||||
%% 2. timeout -> [], remove
|
||||
_ = mock_processor(remove_without_timer_test),
|
||||
Id = gen_id(),
|
||||
{ok, ok} = progressor:init(#{ns => ?NS, id => Id, args => <<"init_args">>}),
|
||||
{ok, #{
|
||||
process_id := Id,
|
||||
status := <<"running">>,
|
||||
history := [
|
||||
#{
|
||||
event_id := 1,
|
||||
metadata := #{<<"format_version">> := 1},
|
||||
payload := _Pl1,
|
||||
timestamp := _Ts1
|
||||
}
|
||||
]
|
||||
}} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
2 = expect_steps_counter(2),
|
||||
{error, <<"process not found">>} = progressor:get(#{ns => ?NS, id => Id}),
|
||||
ok.
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%
|
||||
%% Internal functions
|
||||
%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
mock_processor(simple_timers_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun({_Type, _Args, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
case erlang:length(History) of
|
||||
0 ->
|
||||
Result = #{
|
||||
events => [event(1)],
|
||||
metadata => #{<<"k">> => <<"v">>},
|
||||
%% postponed timer
|
||||
action => #{set_timer => erlang:system_time(second) + 2},
|
||||
aux_state => erlang:term_to_binary(<<"aux_state1">>)
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
1 ->
|
||||
Result = #{
|
||||
events => [event(2)],
|
||||
%% continuation timer
|
||||
action => #{set_timer => erlang:system_time(second)},
|
||||
aux_state => erlang:term_to_binary(<<"aux_state2">>)
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result};
|
||||
_ ->
|
||||
Result = #{
|
||||
events => []
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result}
|
||||
end
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(simple_call_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [event(1)],
|
||||
action => #{set_timer => erlang:system_time(second) + 2}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({call, <<"call_args">>, _Process}, _Opts, _Ctx) ->
|
||||
%% call when process suspended (wait timeout)
|
||||
timer:sleep(3000),
|
||||
Result = #{
|
||||
response => <<"response">>,
|
||||
events => [event(2)]
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
%% timeout after call processing
|
||||
?assertEqual(2, erlang:length(History)),
|
||||
Result = #{
|
||||
events => [event(3)]
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(call_replace_timer_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [event(1)],
|
||||
action => #{set_timer => erlang:system_time(second) + 2, remove => true}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({call, <<"call_args">>, _Process}, _Opts, _Ctx) ->
|
||||
%% call when process suspended (wait timeout)
|
||||
Result = #{
|
||||
response => <<"response">>,
|
||||
events => [],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
%% timeout after call processing (remove action was cancelled by call action)
|
||||
?assertEqual(1, erlang:length(History)),
|
||||
Result = #{
|
||||
events => [event(2)]
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(call_unset_timer_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [event(1)],
|
||||
action => #{set_timer => erlang:system_time(second) + 2}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({call, <<"call_args">>, _Process}, _Opts, _Ctx) ->
|
||||
%% call when process suspended (wait timeout)
|
||||
Result = #{
|
||||
response => <<"response">>,
|
||||
events => [],
|
||||
action => unset_timer
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
%% timeout after call processing (should not work!)
|
||||
?assertEqual(2, erlang:length(History)),
|
||||
Result = #{
|
||||
events => [event(3)]
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(postponed_call_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
timer:sleep(3000),
|
||||
Result = #{
|
||||
events => [event(1)],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result};
|
||||
({call, <<"call_args">>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
?assertEqual(1, erlang:length(History)),
|
||||
Result = #{
|
||||
response => <<"response">>,
|
||||
events => [event(2)]
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
?assertEqual(2, erlang:length(History)),
|
||||
Result = #{
|
||||
events => [event(3)]
|
||||
},
|
||||
Self ! 4,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(postponed_call_to_suspended_process_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
timer:sleep(3000),
|
||||
Result = #{
|
||||
events => [event(1)]
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result};
|
||||
({call, <<"call_args">>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
?assertEqual(1, erlang:length(History)),
|
||||
Result = #{
|
||||
response => <<"response">>,
|
||||
events => [event(2)]
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(multiple_calls_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => []
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({call, <<N>>, _Process}, _Opts, _Ctx) ->
|
||||
timer:sleep(100),
|
||||
Result = #{
|
||||
response => <<"response">>,
|
||||
events => [event(N)]
|
||||
},
|
||||
Self ! iterate,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(repair_after_non_retriable_error_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
Self ! 2,
|
||||
{error, do_not_retry};
|
||||
({repair, <<"repair_args">>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [event(1)]
|
||||
},
|
||||
Self ! 3,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := History} = _Process}, _Opts, _Ctx) ->
|
||||
?assertEqual(1, erlang:length(History)),
|
||||
Result = #{
|
||||
events => [event(2)]
|
||||
},
|
||||
Self ! 4,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(error_after_max_retries_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [],
|
||||
action => #{set_timer => erlang:system_time(second)}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
%% must be 3 attempts
|
||||
Self ! iterate,
|
||||
{error, retry_this}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(repair_after_call_error_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
metadata => #{<<"k">> => <<"v">>},
|
||||
events => []
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({call, <<"call_args">>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
Self ! 2,
|
||||
%% retriable error for call must be ignore and process set error status
|
||||
{error, retry_this};
|
||||
({repair, <<"bad_repair_args">>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
%% repair error should not rewrite process detail
|
||||
Self ! 3,
|
||||
{error, <<"repair_error">>};
|
||||
({repair, <<"repair_args">>, #{history := []} = _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
metadata => #{<<"k2">> => <<"v2">>},
|
||||
events => [event(1)]
|
||||
},
|
||||
Self ! 4,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(remove_by_timer_test = TestCase) ->
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [event(1), event(2)],
|
||||
action => #{set_timer => erlang:system_time(second) + 2, remove => true}
|
||||
},
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor);
|
||||
%%
|
||||
mock_processor(remove_without_timer_test = TestCase) ->
|
||||
Self = self(),
|
||||
MockProcessor = fun
|
||||
({init, <<"init_args">>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [event(1)],
|
||||
action => #{set_timer => erlang:system_time(second) + 2}
|
||||
},
|
||||
Self ! 1,
|
||||
{ok, Result};
|
||||
({timeout, <<>>, _Process}, _Opts, _Ctx) ->
|
||||
Result = #{
|
||||
events => [],
|
||||
action => #{remove => true}
|
||||
},
|
||||
Self ! 2,
|
||||
{ok, Result}
|
||||
end,
|
||||
mock_processor(TestCase, MockProcessor).
|
||||
|
||||
mock_processor(_TestCase, MockFun) ->
|
||||
meck:new(prg_ct_processor),
|
||||
meck:expect(prg_ct_processor, process, MockFun).
|
||||
|
||||
unmock_processor() ->
|
||||
meck:unload(prg_ct_processor).
|
||||
|
||||
expect_steps_counter(ExpectedSteps) ->
|
||||
expect_steps_counter(ExpectedSteps, 0).
|
||||
|
||||
expect_steps_counter(ExpectedSteps, CurrentStep) ->
|
||||
receive
|
||||
iterate when CurrentStep + 1 =:= ExpectedSteps ->
|
||||
%% wait storage
|
||||
timer:sleep(50),
|
||||
ExpectedSteps;
|
||||
iterate ->
|
||||
expect_steps_counter(ExpectedSteps, CurrentStep + 1);
|
||||
Counter when Counter =:= ExpectedSteps ->
|
||||
%% wait storage
|
||||
timer:sleep(50),
|
||||
Counter;
|
||||
Counter ->
|
||||
expect_steps_counter(ExpectedSteps, Counter)
|
||||
after 5000 ->
|
||||
%% after process_step_timeout/2
|
||||
CurrentStep
|
||||
end.
|
||||
|
||||
event(Id) ->
|
||||
#{
|
||||
event_id => Id,
|
||||
timestamp => erlang:system_time(second),
|
||||
metadata => #{<<"format_version">> => 1},
|
||||
payload => erlang:term_to_binary({bin, crypto:strong_rand_bytes(8)}) %% msg_pack compatibility for kafka
|
||||
}.
|
||||
|
||||
gen_id() ->
|
||||
base64:encode(crypto:strong_rand_bytes(8)).
|
126
test/prg_ct_hook.erl
Normal file
126
test/prg_ct_hook.erl
Normal file
@ -0,0 +1,126 @@
|
||||
-module(prg_ct_hook).
|
||||
|
||||
%% API
|
||||
-export([init/2, terminate/1, pre_init_per_suite/3]).
|
||||
|
||||
-define(LIFECYCLE_TOPIC, <<"default_lifecycle_topic">>).
|
||||
-define(EVENTSINK_TOPIC, <<"default_topic">>).
|
||||
-define(BROKERS, [{"kafka1", 9092}, {"kafka2", 9092}, {"kafka3", 9092}]).
|
||||
|
||||
init(_Id, State) ->
|
||||
_ = start_applications(),
|
||||
_ = create_kafka_topics(),
|
||||
State.
|
||||
|
||||
pre_init_per_suite(_SuiteName, Config, State) ->
|
||||
{Config ++ State, State}.
|
||||
|
||||
terminate(_State) -> ok.
|
||||
|
||||
%% Internal functions
|
||||
|
||||
start_applications() ->
|
||||
lists:foreach(fun(App) ->
|
||||
application:load(App),
|
||||
lists:foreach(fun({K, V}) -> ok = application:set_env(App, K, V) end, app_env(App)),
|
||||
{ok, _} = application:ensure_all_started(App)
|
||||
end, app_list()).
|
||||
|
||||
app_list() ->
|
||||
%% in order of launch
|
||||
[
|
||||
epg_connector,
|
||||
brod,
|
||||
progressor
|
||||
].
|
||||
|
||||
app_env(progressor) ->
|
||||
[
|
||||
{defaults, #{
|
||||
storage => #{
|
||||
client => prg_pg_backend,
|
||||
options => #{
|
||||
pool => default_pool
|
||||
}
|
||||
},
|
||||
retry_policy => #{
|
||||
initial_timeout => 1, %% seconds
|
||||
backoff_coefficient => 1.0,
|
||||
max_timeout => 180, %% seconds
|
||||
max_attempts => 3,
|
||||
non_retryable_errors => [
|
||||
do_not_retry,
|
||||
some_reason,
|
||||
any_term,
|
||||
<<"Error message">>,
|
||||
{temporary, unavilable}
|
||||
]
|
||||
},
|
||||
task_scan_timeout => 1, %% seconds
|
||||
worker_pool_size => 10,
|
||||
process_step_timeout => 10 %% seconds
|
||||
}},
|
||||
|
||||
{namespaces, #{
|
||||
'default/default' => #{
|
||||
processor => #{
|
||||
client => prg_ct_processor,
|
||||
options => #{}
|
||||
},
|
||||
notifier => #{
|
||||
client => default_kafka_client,
|
||||
options => #{
|
||||
topic => ?EVENTSINK_TOPIC,
|
||||
lifecycle_topic => ?LIFECYCLE_TOPIC
|
||||
}
|
||||
}
|
||||
}
|
||||
}}
|
||||
];
|
||||
app_env(epg_connector) ->
|
||||
[
|
||||
{databases, #{
|
||||
progressor_db => #{
|
||||
host => "postgres",
|
||||
port => 5432,
|
||||
database => "progressor_db",
|
||||
username => "progressor",
|
||||
password => "progressor"
|
||||
}
|
||||
}},
|
||||
{pools, #{
|
||||
default_pool => #{
|
||||
database => progressor_db,
|
||||
size => 10
|
||||
}
|
||||
}}
|
||||
];
|
||||
app_env(brod) ->
|
||||
[
|
||||
{clients, [
|
||||
{default_kafka_client, [
|
||||
{endpoints, ?BROKERS},
|
||||
{auto_start_producers, true},
|
||||
{default_producer_config, []}
|
||||
]}
|
||||
]}
|
||||
].
|
||||
|
||||
create_kafka_topics() ->
|
||||
TopicConfig = [
|
||||
#{
|
||||
configs => [],
|
||||
num_partitions => 1,
|
||||
assignments => [],
|
||||
replication_factor => 1,
|
||||
name => ?EVENTSINK_TOPIC
|
||||
},
|
||||
#{
|
||||
configs => [],
|
||||
num_partitions => 1,
|
||||
assignments => [],
|
||||
replication_factor => 1,
|
||||
name => ?LIFECYCLE_TOPIC
|
||||
}
|
||||
],
|
||||
_ = brod:create_topics(?BROKERS, TopicConfig, #{timeout => 5000}).
|
7
test/prg_ct_processor.erl
Normal file
7
test/prg_ct_processor.erl
Normal file
@ -0,0 +1,7 @@
|
||||
-module(prg_ct_processor).
|
||||
|
||||
-export([process/3]).
|
||||
|
||||
-spec process(_Req, _Opts, _Ctx) -> _.
|
||||
process(_Req, _Opts, _Ctx) ->
|
||||
{ok, #{events => []}}.
|
Loading…
Reference in New Issue
Block a user