From f05f5501423ef34298aeb5d03f6757127914711a Mon Sep 17 00:00:00 2001 From: Luca Versari Date: Sat, 4 Apr 2026 17:37:57 +0200 Subject: [PATCH] Add a new "interactive" task type. --- cms/grading/ParameterTypes.py | 37 +++ cms/grading/Sandbox.py | 48 +-- cms/grading/steps/evaluation.py | 20 +- cms/grading/tasktypes/Interactive.py | 291 ++++++++++++++++++ cms/grading/tasktypes/interactive_keeper.py | 278 +++++++++++++++++ .../contest/submission/file_matching.py | 4 +- cmscontrib/loaders/italy_yaml.py | 190 +++++++----- cmstestsuite/Tests.py | 62 ++++ cmstestsuite/code/interactive-correct.cpp | 11 + cmstestsuite/code/interactive-correct.py | 27 ++ cmstestsuite/code/interactive-crash.cpp | 5 + .../code/interactive-many-correct.cpp | 8 + cmstestsuite/code/interactive-many-crash.cpp | 4 + .../code/interactive-many-early-wa.cpp | 9 + cmstestsuite/code/interactive-many-sleep.cpp | 7 + cmstestsuite/code/interactive-many-tle.cpp | 5 + cmstestsuite/code/interactive-many-wrong.cpp | 14 + cmstestsuite/code/interactive-sleep.cpp | 7 + cmstestsuite/code/interactive-tle.cpp | 5 + .../code/interactive-wrong-protocol.cpp | 9 + cmstestsuite/code/interactive-wrong.cpp | 10 + cmstestsuite/tasks/interactive/__init__.py | 48 +++ .../tasks/interactive/code/controller | 67 ++++ cmstestsuite/tasks/interactive/code/stub.cpp | 21 ++ cmstestsuite/tasks/interactive/code/stub.py | 21 ++ .../tasks/interactive/data/input0.out | 0 .../tasks/interactive/data/input0.txt | 1 + .../tasks/interactive/data/input1.out | 0 .../tasks/interactive/data/input1.txt | 1 + .../tasks/interactive/data/input2.out | 0 .../tasks/interactive/data/input2.txt | 1 + .../tasks/interactive_many/__init__.py | 44 +++ .../tasks/interactive_many/code/controller | 59 ++++ .../tasks/interactive_many/data/input0.out | 0 .../tasks/interactive_many/data/input0.txt | 1 + docs/Task types.rst | 43 ++- setup.py | 5 + 37 files changed, 1251 insertions(+), 112 deletions(-) create mode 100644 cms/grading/tasktypes/Interactive.py create mode 100644 cms/grading/tasktypes/interactive_keeper.py create mode 100644 cmstestsuite/code/interactive-correct.cpp create mode 100644 cmstestsuite/code/interactive-correct.py create mode 100644 cmstestsuite/code/interactive-crash.cpp create mode 100644 cmstestsuite/code/interactive-many-correct.cpp create mode 100644 cmstestsuite/code/interactive-many-crash.cpp create mode 100644 cmstestsuite/code/interactive-many-early-wa.cpp create mode 100644 cmstestsuite/code/interactive-many-sleep.cpp create mode 100644 cmstestsuite/code/interactive-many-tle.cpp create mode 100644 cmstestsuite/code/interactive-many-wrong.cpp create mode 100644 cmstestsuite/code/interactive-sleep.cpp create mode 100644 cmstestsuite/code/interactive-tle.cpp create mode 100644 cmstestsuite/code/interactive-wrong-protocol.cpp create mode 100644 cmstestsuite/code/interactive-wrong.cpp create mode 100644 cmstestsuite/tasks/interactive/__init__.py create mode 100755 cmstestsuite/tasks/interactive/code/controller create mode 100644 cmstestsuite/tasks/interactive/code/stub.cpp create mode 100644 cmstestsuite/tasks/interactive/code/stub.py create mode 100644 cmstestsuite/tasks/interactive/data/input0.out create mode 100644 cmstestsuite/tasks/interactive/data/input0.txt create mode 100644 cmstestsuite/tasks/interactive/data/input1.out create mode 100644 cmstestsuite/tasks/interactive/data/input1.txt create mode 100644 cmstestsuite/tasks/interactive/data/input2.out create mode 100644 cmstestsuite/tasks/interactive/data/input2.txt create mode 100644 cmstestsuite/tasks/interactive_many/__init__.py create mode 100755 cmstestsuite/tasks/interactive_many/code/controller create mode 100644 cmstestsuite/tasks/interactive_many/data/input0.out create mode 100644 cmstestsuite/tasks/interactive_many/data/input0.txt diff --git a/cms/grading/ParameterTypes.py b/cms/grading/ParameterTypes.py index 00d3c08840..136de4de9e 100644 --- a/cms/grading/ParameterTypes.py +++ b/cms/grading/ParameterTypes.py @@ -134,6 +134,26 @@ def parse_string(self, value): return value +class ParameterTypeBool(ParameterType): + """Type for a boolean parameter.""" + + TEMPLATE = GLOBAL_ENVIRONMENT.from_string(""" + +""") + + def validate(self, value): + if not isinstance(value, bool): + raise ValueError("Invalid value for bool parameter %s" % self.name) + + def parse_handler(self, handler, prefix): + return handler.get_argument(prefix + self.short_name, None) is not None + + def parse_string(self, value): + return value.lower() == "true" + + class ParameterTypeInt(ParameterType): """Type for an integer parameter.""" @@ -151,6 +171,23 @@ def parse_string(self, value): return int(value) +class ParameterTypeFloat(ParameterType): + """Type for a float parameter.""" + + TEMPLATE = GLOBAL_ENVIRONMENT.from_string(""" + +""") + + def validate(self, value): + if not isinstance(value, float) and not isinstance(value, int): + raise ValueError("Invalid value for float parameter %s" % self.name) + + def parse_string(self, value): + return float(value) + + class ParameterTypeChoice(ParameterType): """Type for a parameter giving a choice among a finite number of items.""" diff --git a/cms/grading/Sandbox.py b/cms/grading/Sandbox.py index 5444d8fe85..92f7232c7e 100644 --- a/cms/grading/Sandbox.py +++ b/cms/grading/Sandbox.py @@ -241,10 +241,10 @@ def __init__( # we need to ensure that they can read and write to the directory. # But we don't want everybody on the system to, which is why the # outer directory exists with no read permissions. - self._outer_dir = tempfile.mkdtemp( + self._outer_dir: str = tempfile.mkdtemp( dir=self.temp_dir, prefix="cms-%s-" % (self.name) ) - self._home = os.path.join(self._outer_dir, "home") + self._home: str = os.path.join(self._outer_dir, "home") self._home_dest = "/tmp" os.mkdir(self._home) @@ -266,15 +266,16 @@ def __init__( self.inherit_env: list[str] = [] # -E self.set_env: dict[str, str] = {} # -E self.fsize: int | None = None # -f - self.stdin_file: str | None = None # -i - self.stdout_file: str | None = None # -o - self.stderr_file: str | None = None # -r + self.stdin_file: str | int | None = None # -i + self.stdout_file: str | int | None = None # -o + self.stderr_file: str | int | None = None # -r self.stack_space: int | None = None # -k self.address_space: int | None = None # -m self.timeout: float | None = None # -t self.verbosity: int = 0 # -v self.wallclock_timeout: float | None = None # -w self.extra_timeout: float | None = None # -x + self.close_fds = True self.max_processes: int = 1 @@ -656,13 +657,15 @@ def execute_without_std( return the Popen object from subprocess. """ - popen = self._popen( - command, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True, + stdin = self.stdin_file if isinstance(self.stdin_file, int) else subprocess.PIPE + stdout = ( + self.stdout_file if isinstance(self.stdout_file, int) else subprocess.PIPE ) + stderr = ( + self.stderr_file if isinstance(self.stderr_file, int) else subprocess.PIPE + ) + + popen = self._popen(command, stdin=stdin, stdout=stdout, stderr=stderr) # If the caller wants us to wait for completion, we also avoid # std*** to interfere with command. Otherwise we let the @@ -730,12 +733,13 @@ def cleanup(self, delete: bool = False): self._home_dest, ], stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, + stderr=subprocess.DEVNULL, ) # Tell isolate to cleanup the sandbox. subprocess.check_call( - exe + ["--cleanup"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT + exe + ["--cleanup"], + stdout=subprocess.DEVNULL, ) if delete: @@ -877,7 +881,7 @@ def build_box_options(self) -> list[str]: if self.fsize is not None: # Isolate wants file size as KiB. res += ["--fsize=%d" % (self.fsize // 1024)] - if self.stdin_file is not None: + if isinstance(self.stdin_file, str): res += ["--stdin=%s" % self.inner_absolute_path(self.stdin_file)] if self.stack_space is not None: # Isolate wants stack size as KiB. @@ -885,13 +889,13 @@ def build_box_options(self) -> list[str]: if self.address_space is not None: # Isolate wants memory size as KiB. res += ["--cg-mem=%d" % (self.address_space // 1024)] - if self.stdout_file is not None: + if isinstance(self.stdout_file, str): res += ["--stdout=%s" % self.inner_absolute_path(self.stdout_file)] if self.max_processes is not None: res += ["--processes=%d" % self.max_processes] else: res += ["--processes"] - if self.stderr_file is not None: + if isinstance(self.stderr_file, str): res += ["--stderr=%s" % self.inner_absolute_path(self.stderr_file)] if self.timeout is not None: res += ["--time=%g" % self.timeout] @@ -900,6 +904,8 @@ def build_box_options(self) -> list[str]: res += ["--wall-time=%g" % self.wallclock_timeout] if self.extra_timeout is not None: res += ["--extra-time=%g" % self.extra_timeout] + if not self.close_fds: + res += ["--inherit-fds", "--open-files=0"] res += ["--meta=%s" % ("%s.%d" % (self.info_basename, self.exec_num))] res += ["--run"] return res @@ -957,7 +963,6 @@ def _popen( stdin: int | None = None, stdout: int | None = None, stderr: int | None = None, - close_fds: bool = True, ) -> subprocess.Popen: """Execute the given command in the sandbox using subprocess.Popen, assigning the corresponding standard file @@ -967,7 +972,6 @@ def _popen( stdin: a file descriptor. stdout: a file descriptor. stderr: a file descriptor. - close_fds: close all file descriptor before executing. return: popen object. @@ -988,7 +992,11 @@ def _popen( os.chmod(self._home, prev_permissions) try: p = subprocess.Popen( - args, stdin=stdin, stdout=stdout, stderr=stderr, close_fds=close_fds + args, + stdin=stdin, + stdout=stdout, + stderr=stderr, + close_fds=self.close_fds, ) except OSError: logger.critical( @@ -1004,6 +1012,6 @@ def initialize_isolate(self): """Initialize isolate's box.""" init_cmd = ["isolate", "--box-id=%d" % self.box_id, "--cg", "--init"] try: - subprocess.check_call(init_cmd) + subprocess.check_call(init_cmd, stdout=subprocess.DEVNULL) except subprocess.CalledProcessError as e: raise SandboxInterfaceException("Failed to initialize sandbox") from e diff --git a/cms/grading/steps/evaluation.py b/cms/grading/steps/evaluation.py index adbe507b32..2a26b6a075 100644 --- a/cms/grading/steps/evaluation.py +++ b/cms/grading/steps/evaluation.py @@ -136,8 +136,8 @@ def evaluation_step( for command in commands: success = evaluation_step_before_run( sandbox, command, time_limit, memory_limit, - dirs_map, writable_files, stdin_redirect, stdout_redirect, - multiprocess, wait=True) + None, dirs_map, writable_files, stdin_redirect, + stdout_redirect, multiprocess, wait=True) if not success: logger.debug("Job failed in evaluation_step_before_run.") return False, None, None @@ -154,11 +154,13 @@ def evaluation_step_before_run( command: list[str], time_limit: float | None = None, memory_limit: int | None = None, + wall_limit: float | None = None, dirs_map: dict[str, tuple[str | None, str | None]] | None = None, writable_files: list[str] | None = None, - stdin_redirect: str | None = None, - stdout_redirect: str | None = None, + stdin_redirect: str | int | None = None, + stdout_redirect: str | int | None = "stdout.txt", multiprocess: bool = False, + close_fds: bool = True, wait: bool = False, ) -> bool | subprocess.Popen: """First part of an evaluation step, up to the execution, included. @@ -175,6 +177,8 @@ def evaluation_step_before_run( # Ensure parameters are appropriate. if time_limit is not None and time_limit <= 0: raise ValueError("Time limit must be positive, is %s" % time_limit) + if wall_limit is not None and wall_limit <= 0: + raise ValueError("Wall limit must be positive, is %s" % wall_limit) if memory_limit is not None and memory_limit <= 0: raise ValueError( "Memory limit must be positive, is %s" % memory_limit) @@ -184,8 +188,6 @@ def evaluation_step_before_run( dirs_map = {} if writable_files is None: writable_files = [] - if stdout_redirect is None: - stdout_redirect = "stdout.txt" # Set sandbox parameters suitable for evaluation. if time_limit is not None: @@ -195,6 +197,9 @@ def evaluation_step_before_run( sandbox.timeout = None sandbox.wallclock_timeout = None + if wall_limit is not None: + sandbox.wallclock_timeout = wall_limit + if memory_limit is not None: sandbox.address_space = memory_limit else: @@ -210,11 +215,12 @@ def evaluation_step_before_run( for src, (dest, options) in dirs_map.items(): sandbox.add_mapped_directory(src, dest=dest, options=options) for name in [sandbox.stderr_file, sandbox.stdout_file]: - if name is not None: + if isinstance(name, str): writable_files.append(name) sandbox.allow_writing_only(writable_files) sandbox.set_multiprocess(multiprocess) + sandbox.close_fds = close_fds # Actually run the evaluation command. logger.debug("Starting execution step.") diff --git a/cms/grading/tasktypes/Interactive.py b/cms/grading/tasktypes/Interactive.py new file mode 100644 index 0000000000..9f3d8b1498 --- /dev/null +++ b/cms/grading/tasktypes/Interactive.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +import tempfile + +import json +import logging +import os +import sys +import subprocess + +from cms import config +from cms.db import Executable +from cms.grading.ParameterTypes import ( + ParameterTypeChoice, + ParameterTypeInt, + ParameterTypeFloat, + ParameterTypeBool, +) +from cms.grading.languagemanager import get_language, LANGUAGES +from cms.grading.steps import compilation_step +from .abc import TaskType +from .util import ( + check_executables_number, + check_manager_present, + create_sandbox, + delete_sandbox, + is_manager_for_compilation, +) + +logger = logging.getLogger(__name__) + + +class Interactive(TaskType): + """Task type class for interactive tasks where a controller dynamically + spawns solution instances. + """ + + CONTROLLER_FILENAME = "controller" + COMPILATION_ALONE = "alone" + COMPILATION_STUB = "stub" + STUB_BASENAME = "stub" + + _COMPILATION = ParameterTypeChoice( + "Compilation", + "compilation", + "", + { + COMPILATION_ALONE: "Submissions are self-sufficient", + COMPILATION_STUB: "Submissions are compiled with a stub", + }, + ) + + _PROCESS_LIMIT = ParameterTypeInt( + "Process limit", + "process_limit", + "Maximum number of solution instances the controller can spawn." + "Resource usage is proportional to this limit.", + ) + + _CONCURRENT = ParameterTypeBool( + "Concurrent solutions", + "concurrent", + "Whether solutions are assumed to be run concurrently or not", + ) + + _CONTROLLER_MEMORY_LIMIT = ParameterTypeFloat( + "Controller memory limit (MB)", + "controller_memory_limit", + "Maximum memory (in MB) that the controller can use", + ) + + _CONTROLLER_TIME_LIMIT = ParameterTypeFloat( + "Controller time limit (s)", + "controller_time_limit", + "Maximum CPU time (in seconds) that the controller can use", + ) + + _CONTROLLER_WALL_LIMIT = ParameterTypeFloat( + "Controller wall time limit (s)", + "controller_wall_limit", + "Maximum wall time (in seconds) that the controller can use", + ) + + ACCEPTED_PARAMETERS = [ + _PROCESS_LIMIT, + _COMPILATION, + _CONCURRENT, + _CONTROLLER_MEMORY_LIMIT, + _CONTROLLER_TIME_LIMIT, + _CONTROLLER_WALL_LIMIT, + ] + + def __init__(self, parameters): + super().__init__(parameters) + self.process_limit = self.parameters[0] + self.compilation_type = self.parameters[1] + self.concurrent = self.parameters[2] + # Note: Sandbox wants the memory limit in *bytes*! + self.controller_memory_limit = self.parameters[3] * 2**20 + self.controller_time_limit = self.parameters[4] + self.controller_wall_limit = self.parameters[5] + + def get_compilation_commands(self, submission_format): + """See TaskType.get_compilation_commands.""" + codenames_to_compile = [] + if self._uses_stub(): + codenames_to_compile.append(self.STUB_BASENAME + ".%l") + codenames_to_compile.extend([x for x in submission_format if x.endswith(".%l")]) + res = dict() + for language in LANGUAGES: + source_ext = language.source_extension + executable_filename = self._executable_filename(submission_format, language) + res[language.name] = language.get_compilation_commands( + [ + codename.replace(".%l", source_ext) + for codename in codenames_to_compile + ], + executable_filename, + ) + return res + + def get_user_managers(self): + """See TaskType.get_user_managers.""" + if self._uses_stub(): + return [self.STUB_BASENAME + ".%l"] + else: + return [] + + def get_auto_managers(self): + """See TaskType.get_auto_managers.""" + return [] + + def _uses_stub(self) -> bool: + return self.compilation_type == self.COMPILATION_STUB + + def compile(self, job, file_cacher): + """See TaskType.compile.""" + if not check_executables_number(job, 0): + return + + language = get_language(job.language) + + source_ext = language.source_extension + filenames_to_compile = [] + filenames_and_digests_to_get = {} + + # Grader (if needed). + if self._uses_stub(): + grader_filename = self.STUB_BASENAME + source_ext + if not check_manager_present(job, grader_filename): + return + filenames_to_compile.append(grader_filename) + filenames_and_digests_to_get[grader_filename] = job.managers[ + grader_filename + ].digest + + # User's submitted file(s). + for codename, file_ in job.files.items(): + filename = codename.replace(".%l", source_ext) + filenames_to_compile.append(filename) + filenames_and_digests_to_get[filename] = file_.digest + + # Any other useful manager (just copy). + for filename, manager in job.managers.items(): + if is_manager_for_compilation(filename, language): + filenames_and_digests_to_get[filename] = manager.digest + + executable_filename = self._executable_filename(job.files.keys(), language) + commands = language.get_compilation_commands( + filenames_to_compile, executable_filename + ) + + sandbox = create_sandbox(0, file_cacher, name="compile") + job.sandboxes.append(sandbox.get_root_path()) + + for filename, digest in filenames_and_digests_to_get.items(): + sandbox.create_file_from_storage(filename, digest, file_cacher) + + box_success, compilation_success, text, stats = compilation_step( + sandbox, commands + ) + + job.success = box_success + job.compilation_success = compilation_success + job.text = text + job.plus = stats + if box_success and compilation_success: + digest = sandbox.get_file_to_storage( + executable_filename, + file_cacher, + "Executable %s for %s" % (executable_filename, job.info), + ) + job.executables[executable_filename] = Executable( + executable_filename, digest + ) + + delete_sandbox(sandbox, job, file_cacher) + + def evaluate(self, job, file_cacher): + """See TaskType.evaluate.""" + if not check_executables_number(job, 1): + return + executable_filename = next(iter(job.executables.keys())) + executable_digest = job.executables[executable_filename].digest + + if not check_manager_present(job, self.CONTROLLER_FILENAME): + return + controller_digest = job.managers[self.CONTROLLER_FILENAME].digest + + language = get_language(job.language) + controller_command = ["./%s" % self.CONTROLLER_FILENAME] + solution_commands = language.get_evaluation_commands(executable_filename) + + # We need absolute paths for the keeper. + with tempfile.TemporaryDirectory( + dir=config.global_.temp_dir, prefix="interactive" + ) as tempdir: + with open(os.path.join(tempdir, self.CONTROLLER_FILENAME), "wb") as f: + file_cacher.get_file_to_fobj(controller_digest, f) + + with open(os.path.join(tempdir, executable_filename), "wb") as f: + file_cacher.get_file_to_fobj(executable_digest, f) + + with open(os.path.join(tempdir, "input.txt"), "wb") as f: + file_cacher.get_file_to_fobj(job.input, f) + + keeper_config = { + "controller_command": controller_command, + "solution_commands": solution_commands, + "controller_files": [ + self.CONTROLLER_FILENAME, + "input.txt", + ], + "solution_files": [executable_filename], + "controller_wall_limit": self.controller_wall_limit, + "controller_time_limit": self.controller_time_limit, + "controller_memory_limit": self.controller_memory_limit, + "solution_time_limit": job.time_limit, + "solution_memory_limit": job.memory_limit, + "process_limit": self.process_limit, + "concurrent": self.concurrent, + "temp_dir": tempdir, + "shard": file_cacher.service.shard if file_cacher.service else None, + "delete_sandbox": not (job.keep_sandbox or job.archive_sandbox), + } + + keeper_path = os.path.join( + os.path.dirname(__file__), "interactive_keeper.py" + ) + + p = subprocess.Popen( + [sys.executable, keeper_path, json.dumps(keeper_config)], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + ) + stdout, _ = p.communicate(timeout=self.controller_wall_limit * 2) + + KEEPER_ERROR_MESSAGE = "Internal error in interactive keeper" + + if p.returncode != 0: + logger.error("Keeper failed with return code %d", p.returncode) + logger.error( + "Keeper output: %s", stdout.decode("utf-8", errors="replace") + ) + job.success = False + job.text = [KEEPER_ERROR_MESSAGE] + return + + try: + stdout_str = stdout.decode("utf-8") + result = json.loads(stdout_str) + logger.info("Parsed keeper result: %s", result) + except ValueError as e: + logger.error( + "Failed to parse keeper output: %s. Output: %r", + e, + stdout.decode("utf-8", errors="replace"), + ) + job.success = False + job.text = [KEEPER_ERROR_MESSAGE] + return + + job.success = result["success"] + job.outcome = str(result["outcome"]) + job.text = result["text"] + job.admin_text = result.get("admin_text") + job.plus = result.get("stats", {}) + + def _executable_filename(self, codenames, language): + """Return the filename of the executable.""" + name = "_".join(sorted(codename.replace(".%l", "") for codename in codenames)) + return name + language.executable_extension diff --git a/cms/grading/tasktypes/interactive_keeper.py b/cms/grading/tasktypes/interactive_keeper.py new file mode 100644 index 0000000000..0399cb77c9 --- /dev/null +++ b/cms/grading/tasktypes/interactive_keeper.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 + +# Contest Management System - http://cms-dev.github.io/ +# Copyright © 2026 Luca Versari +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +import json +import logging +import os +import shutil +import sys +from functools import reduce + +from cms.grading.Sandbox import Sandbox, wait_without_std +from cms.grading.steps.evaluation import ( + evaluation_step_before_run, + evaluation_step_after_run, + human_evaluation_message, +) +from cms.grading.steps.stats import merge_execution_stats +from cms.grading.steps import trusted_step + +# Note: we keep a separate interactive keeper (running in a separate +# process) to avoid opening many file descriptors in the main worker. +# This both makes cleanup easier, and avoids potential deadlocks if +# there ever happen to be multiple threads in the worker and one of +# those threads fork()s at the wrong moment. + +# Configure logging to stderr for critical errors only +logger = logging.getLogger("interactive_keeper") + + +def get_controller_text(sandbox): + score = None + text = [] + admin_text = None + with sandbox.get_file_text("stderr.txt") as f: + for line in f.readlines(): + line = line.strip() + if line.startswith("SCORE: "): + assert score is None + score = float(line[len("SCORE: ") :].strip()) + elif line.startswith("USER_MESSAGE: "): + assert not text + text = [line[len("USER_MESSAGE: ") :].strip()] + elif line.startswith("ADMIN_MESSAGE: "): + assert admin_text is None + admin_text = line[len("ADMIN_MESSAGE: ") :].strip() + else: + raise ValueError(f"Unknown controller feedback command {f}") + + return score, text, admin_text + + +def main(): + config = json.loads(sys.argv[1]) + + controller_command = config["controller_command"] + solution_commands = config["solution_commands"] + controller_files = config["controller_files"] + solution_files = config["solution_files"] + controller_wall_limit = config.get("controller_wall_limit") + controller_time_limit = config.get("controller_time_limit") + controller_memory_limit = config.get("controller_memory_limit") + solution_time_limit = config.get("solution_time_limit") + solution_memory_limit = config.get("solution_memory_limit") + process_limit = config.get("process_limit") + concurrent = config.get("concurrent") + temp_dir = config.get("temp_dir") + shard = config.get("shard") + delete_sandbox = config.get("delete_sandbox") + + pipes = [] + for i in range(process_limit): + c_to_u_r, c_to_u_w = os.pipe() + u_to_c_r, u_to_c_w = os.pipe() + os.set_inheritable(c_to_u_r, True) + os.set_inheritable(c_to_u_w, True) + os.set_inheritable(u_to_c_r, True) + os.set_inheritable(u_to_c_w, True) + pipes.append({"c_to_u": (c_to_u_r, c_to_u_w), "u_to_c": (u_to_c_r, u_to_c_w)}) + + controller_sandbox = Sandbox(0, shard, name="controller", temp_dir=temp_dir) + for path in controller_files: + with controller_sandbox.create_file(path, executable=True) as f: + with open(os.path.join(temp_dir, path), "rb") as g: + shutil.copyfileobj(g, f) + + controller_proc = evaluation_step_before_run( + controller_sandbox, + controller_command, + time_limit=controller_time_limit, + memory_limit=controller_memory_limit, + wall_limit=controller_wall_limit, + stdin_redirect=None, + stdout_redirect=None, + multiprocess=True, + close_fds=False, + wait=False, + ) + + assert not isinstance(controller_proc, bool) + + for p in pipes: + os.close(p["c_to_u"][1]) + os.close(p["u_to_c"][0]) + + next_process_index = 0 + solution_sandboxes = [] + solution_procs = [] + + while True: + line = controller_proc.stdout.readline() + if not line: + break + line = line.decode("utf-8").strip() + if line == "START_SOLUTION": + if next_process_index >= process_limit: + break + + p = pipes[next_process_index] + sandbox_sol = Sandbox( + 1 + next_process_index, + shard, + name="solution_%d" % next_process_index, + temp_dir=temp_dir, + ) + for path in solution_files: + with sandbox_sol.create_file(path, executable=True) as f: + with open(os.path.join(temp_dir, path), "rb") as g: + shutil.copyfileobj(g, f) + + # Matches Communication's handling of multi-command executions. + if len(solution_commands) > 1: + trusted_step(sandbox_sol, solution_commands[:-1]) + sol_proc = evaluation_step_before_run( + sandbox_sol, + solution_commands[-1], + time_limit=solution_time_limit, + wall_limit=controller_wall_limit, + # the wall-clock limit mostly exists to eventually kill stuck + # solutions. there's no point setting it higher than the + # controller's limit (as then the controller would die before + # any of the solutions), and there's little to be gained from + # setting it any lower. to reduce the amount of unnecessary + # configuration, set it to the same limit as the controller. + memory_limit=solution_memory_limit, + stdin_redirect=p["c_to_u"][0], + stdout_redirect=p["u_to_c"][1], + multiprocess=False, + close_fds=False, + wait=False, + ) + + os.close(p["c_to_u"][0]) + os.close(p["u_to_c"][1]) + + try: + controller_proc.stdin.write( + ("%d %d\n" % (p["c_to_u"][1], p["u_to_c"][0])).encode("utf-8") + ) + controller_proc.stdin.flush() + except BrokenPipeError: + # If the controller dies before we can write back the pipes, + # the keeper should not die. + pass + + solution_sandboxes.append(sandbox_sol) + solution_procs.append(sol_proc) + next_process_index += 1 + else: + result = { + "success": False, + "outcome": 0.0, + "text": "Invalid command from controller: " + line, + "admin_text": None, + "stats": None, + } + print(json.dumps(result), flush=True) + sys.exit(0) + + # Close controller pipes explicitly before result collection + if controller_proc.stdin: + controller_proc.stdin.close() + if controller_proc.stdout: + controller_proc.stdout.close() + + # Wait for all the sandboxes to exit before collecting results. + wait_without_std([controller_proc] + solution_procs) + + success_mgr, evaluation_success_mgr, stats_mgr = evaluation_step_after_run( + controller_sandbox + ) + + print(success_mgr, evaluation_success_mgr, stats_mgr, file=sys.stderr) + + user_results = [evaluation_step_after_run(s) for s in solution_sandboxes] + box_success_user = all(r[0] for r in user_results) + evaluation_success_user = all(r[1] for r in user_results) + + valid_stats = [r[2] for r in user_results if r[2] is not None] + + def do_merge(a, b): + return merge_execution_stats(a, b, concurrent=concurrent) + + if valid_stats: + stats_user = reduce(do_merge, valid_stats) + else: + stats_user = { + "execution_time": 0.0, + "execution_memory": 0, + "execution_wall_clock_time": 0.0, + "exit_status": "ok", + } + + outcome = None + text = None + admin_text = None + success = True + + try: + score, controller_text, admin_text = get_controller_text(controller_sandbox) + except Exception as e: + success = False + text = ["Internal error"] + admin_text = [f"Internal error: {e}"] + + if not success: + pass + elif not (success_mgr and box_success_user): + success = False + else: + if not evaluation_success_user: + outcome = 0.0 + text = human_evaluation_message(stats_user) + if controller_text: + text = ( + [controller_text[0] + f" (may be caused by {text[0]})"] + + controller_text[1:] + + text[1:] + ) + elif not evaluation_success_mgr: + outcome = 0.0 + text = ["Controller failed"] + (controller_text if controller_text else []) + else: + outcome = score if score is not None else 0.0 + text = controller_text + + result = { + "success": success, + "outcome": outcome, + "text": text, + "admin_text": admin_text, + "stats": stats_user, + } + # Communicate results back to the worker + print(json.dumps(result), flush=True) + + controller_sandbox.cleanup(delete=delete_sandbox) + for s in solution_sandboxes: + s.cleanup(delete=delete_sandbox) + + +if __name__ == "__main__": + main() diff --git a/cms/server/contest/submission/file_matching.py b/cms/server/contest/submission/file_matching.py index 23bcbfce9b..0ffb92c2f2 100644 --- a/cms/server/contest/submission/file_matching.py +++ b/cms/server/contest/submission/file_matching.py @@ -138,8 +138,8 @@ def _match_file( return codename raise InvalidFiles( - "file %r/%r doesn't unambiguously match the submission format" - % (codename, filename)) + "file %r/%r doesn't unambiguously match the submission format %r" + % (codename, filename, submission_format)) def _match_files( diff --git a/cmscontrib/loaders/italy_yaml.py b/cmscontrib/loaders/italy_yaml.py index 9b923b394d..760a34960a 100644 --- a/cmscontrib/loaders/italy_yaml.py +++ b/cmscontrib/loaders/italy_yaml.py @@ -595,11 +595,16 @@ def get_task(self, get_statement=True) -> Task | None: # presuming that the task type is Batch, we retrieve graders # in the form sol/grader.%l graders = False + stubs = False for lang in LANGUAGES: if os.path.exists(os.path.join( self.path, "sol", "grader%s" % lang.source_extension)): graders = True break + if os.path.exists(os.path.join( + self.path, "sol", "stub%s" % lang.source_extension)): + stubs = True + break if graders: # Read grader for each language for lang in LANGUAGES: @@ -615,6 +620,24 @@ def get_task(self, get_statement=True) -> Task | None: Manager("grader%s" % extension, digest)] else: logger.warning("Grader for language %s not found ", lang) + compilation_param = "grader" + elif stubs: + # Read grader for each language + for lang in LANGUAGES: + extension = lang.source_extension + grader_filename = os.path.join( + self.path, "sol", "stub%s" % extension) + if os.path.exists(grader_filename): + digest = self.file_cacher.put_file_from_path( + grader_filename, + "Stub for task %s and language %s" % + (task.name, lang)) + args["managers"] += [ + Manager("stub%s" % extension, digest)] + else: + logger.warning("Stub for language %s not found ", lang) + compilation_param = "stub" + if graders or stubs: # Read managers with other known file extensions for other_filename in os.listdir(os.path.join(self.path, "sol")): if any(other_filename.endswith(header) @@ -624,7 +647,6 @@ def get_task(self, get_statement=True) -> Task | None: "Manager %s for task %s" % (other_filename, task.name)) args["managers"] += [ Manager(other_filename, digest)] - compilation_param = "grader" else: compilation_param = "alone" @@ -655,9 +677,9 @@ def get_task(self, get_statement=True) -> Task | None: else: if "score_type" in conf or "score_type_parameters" in conf: logger.warning("To override score type data, task.yaml must " - "specify all 'score_type', " - "'score_type_parameters' and " - "'n_input'.") + "specify all 'score_type', " + "'score_type_parameters' and " + "'n_input'.") # Detect subtasks by checking GEN gen_filename = os.path.join(self.path, 'gen', 'GEN') @@ -747,88 +769,92 @@ def get_task(self, get_statement=True) -> Task | None: task.submission_format = \ ["output_%03d.txt" % i for i in range(n_input)] + # If there is check/controller (or equivalent), then the task + # type is Interactive + controller_path = None + for path in (os.path.join(self.path, "check", "controller"), + os.path.join(self.path, "cor", "controller")): + if os.path.exists(path): + controller_path = path + break + # If there is check/manager (or equivalent), then the task # type is Communication - else: - paths = [os.path.join(self.path, "check", "manager"), - os.path.join(self.path, "cor", "manager")] - for path in paths: - if os.path.exists(path): - num_processes = load(conf, None, "num_processes") - if num_processes is None: - num_processes = 1 - io_type = load(conf, None, "user_io") - if io_type is not None: - if io_type not in ["std_io", "fifo_io"]: - logger.warning("user_io incorrect. Valid options " - "are 'std_io' and 'fifo_io'. " - "Ignored.") - io_type = None - logger.info("Task type Communication") - args["task_type"] = "Communication" - args["task_type_parameters"] = \ - [num_processes, "alone", io_type or "std_io"] - digest = self.file_cacher.put_file_from_path( - path, - "Manager for task %s" % task.name) - args["managers"] += [ - Manager("manager", digest)] - for lang in LANGUAGES: - stub_name = os.path.join( - self.path, "sol", "stub%s" % lang.source_extension) - if os.path.exists(stub_name): - digest = self.file_cacher.put_file_from_path( - stub_name, - "Stub for task %s and language %s" % ( - task.name, lang.name)) - args["task_type_parameters"] = \ - [num_processes, "stub", io_type or "fifo_io"] - args["managers"] += [ - Manager( - "stub%s" % lang.source_extension, digest)] - else: - logger.warning("Stub for language %s not " - "found.", lang.name) - for other_filename in os.listdir(os.path.join(self.path, - "sol")): - if any(other_filename.endswith(header) - for header in HEADER_EXTS): - digest = self.file_cacher.put_file_from_path( - os.path.join(self.path, "sol", other_filename), - "Stub %s for task %s" % (other_filename, - task.name)) - args["managers"] += [ - Manager(other_filename, digest)] - break + manager_path = None + for path in (os.path.join(self.path, "check", "manager"), + os.path.join(self.path, "cor", "manager")): + if os.path.exists(path): + manager_path = path + break + if controller_path is not None and manager_path is not None: + logger.fatal("Cannot have both a manager and a controller") + + if controller_path is not None: + args["task_type"] = "Interactive" + logger.info("Task type Interactive") + + process_limit = conf.get("controller_process_limit", 200) + concurrent = conf.get("interactive_concurrent", True) + controller_memory_limit_mb = conf.get("controller_memory_limit", None) + controller_time_limit = conf.get("controller_time_limit", None) + controller_wall_limit = conf.get("controller_wall_time_limit", None) + + args["task_type_parameters"] = \ + [process_limit, compilation_param, concurrent, + controller_memory_limit_mb, controller_time_limit, controller_wall_limit] + digest = self.file_cacher.put_file_from_path( + controller_path, + "Controller for task %s" % task.name) + args["managers"] += [Manager("controller", digest)] + elif manager_path is not None: + num_processes = load(conf, None, "num_processes") + if num_processes is None: + num_processes = 1 + io_type = load(conf, None, "user_io") + if io_type is not None: + if io_type not in ["std_io", "fifo_io"]: + logger.warning("user_io incorrect. Valid options " + "are 'std_io' and 'fifo_io'. " + "Ignored.") + io_type = None + logger.info("Task type Communication") + args["task_type"] = "Communication" + args["task_type_parameters"] = \ + [num_processes, compilation_param, + io_type or ("fifo_io" if compilation_param == "stub" else "std_io")] + digest = self.file_cacher.put_file_from_path( + manager_path, + "Manager for task %s" % task.name) + args["managers"] += [Manager("manager", digest)] + else: # Otherwise, the task type is Batch or BatchAndOutput - else: - args["task_type"] = "Batch" - args["task_type_parameters"] = [ - compilation_param, - [infile_param, outfile_param], - evaluation_param, - ] - - output_only_testcases = load(conf, None, "output_only_testcases", - conv=lambda x: "" if x is None else x) - output_optional_testcases = load(conf, None, "output_optional_testcases", - conv=lambda x: "" if x is None else x) - if len(output_only_testcases) > 0 or len(output_optional_testcases) > 0: - args["task_type"] = "BatchAndOutput" - output_only_codenames = set() - if len(output_only_testcases) > 0: - output_only_codenames = \ - {"%03d" % int(x.strip()) for x in output_only_testcases.split(',')} - args["task_type_parameters"].append(','.join(output_only_codenames)) - else: - args["task_type_parameters"].append("") - output_codenames = set() - if len(output_optional_testcases) > 0: - output_codenames = \ - {"%03d" % int(x.strip()) for x in output_optional_testcases.split(',')} - output_codenames.update(output_only_codenames) - task.submission_format.extend(["output_%s.txt" % s for s in sorted(output_codenames)]) + args["task_type"] = "Batch" + args["task_type_parameters"] = [ + compilation_param, + [infile_param, outfile_param], + evaluation_param, + ] + + output_only_testcases = load(conf, None, "output_only_testcases", + conv=lambda x: "" if x is None else x) + output_optional_testcases = load(conf, None, "output_optional_testcases", + conv=lambda x: "" if x is None else x) + if len(output_only_testcases) > 0 or len(output_optional_testcases) > 0: + args["task_type"] = "BatchAndOutput" + output_only_codenames = set() + if len(output_only_testcases) > 0: + output_only_codenames = \ + {"%03d" % int(x.strip()) for x in output_only_testcases.split(',')} + args["task_type_parameters"].append(','.join(output_only_codenames)) + else: + args["task_type_parameters"].append("") + output_codenames = set() + if len(output_optional_testcases) > 0: + output_codenames = \ + {"%03d" % int(x.strip()) for x in output_optional_testcases.split(',')} + output_codenames.update(output_only_codenames) + task.submission_format.extend(["output_%s.txt" % s for s in sorted(output_codenames)]) args["testcases"] = [] for i in range(n_input): diff --git a/cmstestsuite/Tests.py b/cmstestsuite/Tests.py index 03870f6d49..aa12d46af3 100644 --- a/cmstestsuite/Tests.py +++ b/cmstestsuite/Tests.py @@ -33,6 +33,8 @@ import cmstestsuite.tasks.communication_stdio as communication_stdio import cmstestsuite.tasks.communication_stdio_stubbed \ as communication_stdio_stubbed +import cmstestsuite.tasks.interactive as interactive +import cmstestsuite.tasks.interactive_many as interactive_many import cmstestsuite.tasks.outputonly as outputonly import cmstestsuite.tasks.outputonly_comparator as outputonly_comparator import cmstestsuite.tasks.twosteps as twosteps @@ -94,6 +96,66 @@ languages=ALL_LANGUAGES, checks=[CheckOverallScore(100, 100)]), + Test('interactive-correct', + task=interactive, filenames=['interactive-correct.%l'], + languages=(LANG_CPP, LANG_CPP14, LANG_CPP17, LANG_CPP20, LANG_PYTHON3, LANG_PYPY3), + checks=[CheckOverallScore(100, 100)]), + + Test('interactive-wrong', + task=interactive, filenames=['interactive-wrong.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100)]), + + Test('interactive-sleep', + task=interactive, filenames=['interactive-sleep.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100), CheckTimeoutWall()]), + + Test('interactive-tle', + task=interactive, filenames=['interactive-tle.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100), CheckTimeout()]), + + Test('interactive-crash', + task=interactive, filenames=['interactive-crash.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100), CheckNonzeroReturn()]), + + Test('interactive-wrong-protocol', + task=interactive, filenames=['interactive-wrong-protocol.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100)]), + + Test('interactive-many-correct', + task=interactive_many, filenames=['interactive-many-correct.%l'], + languages=(LANG_CPP, LANG_CPP14, LANG_CPP17, LANG_CPP20), + checks=[CheckOverallScore(100, 100)]), + + Test('interactive-many-wrong', + task=interactive_many, filenames=['interactive-many-wrong.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100)]), + + Test('interactive-many-early-wa', + task=interactive_many, filenames=['interactive-many-early-wa.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100)]), + + Test('interactive-many-sleep', + task=interactive_many, filenames=['interactive-many-sleep.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100), CheckTimeoutWall()]), + + Test('interactive-many-tle', + task=interactive_many, filenames=['interactive-many-tle.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100), CheckTimeout()]), + + Test('interactive-many-crash', + task=interactive_many, filenames=['interactive-many-crash.%l'], + languages=(LANG_CPP,), + checks=[CheckOverallScore(0, 100), CheckNonzeroReturn()]), + Test('correct-freopen', task=batch_fileio, filenames=['correct-freopen.%l'], languages=(LANG_C,), diff --git a/cmstestsuite/code/interactive-correct.cpp b/cmstestsuite/code/interactive-correct.cpp new file mode 100644 index 0000000000..7d3268d8b3 --- /dev/null +++ b/cmstestsuite/code/interactive-correct.cpp @@ -0,0 +1,11 @@ +#include + +int op(int code, int a, int b) { + if (code == 0) + return a + b; + else if (code == 1) + return a * b; + else + assert(false); + return 0; +} diff --git a/cmstestsuite/code/interactive-correct.py b/cmstestsuite/code/interactive-correct.py new file mode 100644 index 0000000000..e25afe81d7 --- /dev/null +++ b/cmstestsuite/code/interactive-correct.py @@ -0,0 +1,27 @@ +import sys + +def op(code, a, b): + if code == 0: + return a + b + elif code == 1: + return a * b + return 0 + +def main(): + try: + line = sys.stdin.readline() + if not line: return + a, b = map(int, line.split()) + print(op(0, a, b)) + sys.stdout.flush() + + line = sys.stdin.readline() + if not line: return + a, b = map(int, line.split()) + print(op(1, a, b)) + sys.stdout.flush() + except EOFError: + pass + +if __name__ == "__main__": + main() diff --git a/cmstestsuite/code/interactive-crash.cpp b/cmstestsuite/code/interactive-crash.cpp new file mode 100644 index 0000000000..33f4450476 --- /dev/null +++ b/cmstestsuite/code/interactive-crash.cpp @@ -0,0 +1,5 @@ +#include +int op(int code, int a, int b) { + exit(1); + return 0; +} diff --git a/cmstestsuite/code/interactive-many-correct.cpp b/cmstestsuite/code/interactive-many-correct.cpp new file mode 100644 index 0000000000..ce7737f447 --- /dev/null +++ b/cmstestsuite/code/interactive-many-correct.cpp @@ -0,0 +1,8 @@ +#include +int main() { + int v; + if (scanf("%d", &v) != 1) return 1; + printf("%d\n", v + 1); + fflush(stdout); + return 0; +} diff --git a/cmstestsuite/code/interactive-many-crash.cpp b/cmstestsuite/code/interactive-many-crash.cpp new file mode 100644 index 0000000000..3478e6873e --- /dev/null +++ b/cmstestsuite/code/interactive-many-crash.cpp @@ -0,0 +1,4 @@ +#include +int main() { + exit(1); +} diff --git a/cmstestsuite/code/interactive-many-early-wa.cpp b/cmstestsuite/code/interactive-many-early-wa.cpp new file mode 100644 index 0000000000..673a9adf3a --- /dev/null +++ b/cmstestsuite/code/interactive-many-early-wa.cpp @@ -0,0 +1,9 @@ +#include +int main() { + int v; + if (scanf("%d", &v) != 1) + return 1; + printf("-1\n"); // Definitely not v+1 + fflush(stdout); + return 0; +} diff --git a/cmstestsuite/code/interactive-many-sleep.cpp b/cmstestsuite/code/interactive-many-sleep.cpp new file mode 100644 index 0000000000..e5a8f11e0f --- /dev/null +++ b/cmstestsuite/code/interactive-many-sleep.cpp @@ -0,0 +1,7 @@ +#include +int main() { + while (1) { + sleep(1); + } + return 0; +} diff --git a/cmstestsuite/code/interactive-many-tle.cpp b/cmstestsuite/code/interactive-many-tle.cpp new file mode 100644 index 0000000000..76487538b0 --- /dev/null +++ b/cmstestsuite/code/interactive-many-tle.cpp @@ -0,0 +1,5 @@ +int main() { + while (1) + ; + return 0; +} diff --git a/cmstestsuite/code/interactive-many-wrong.cpp b/cmstestsuite/code/interactive-many-wrong.cpp new file mode 100644 index 0000000000..0fa64f8927 --- /dev/null +++ b/cmstestsuite/code/interactive-many-wrong.cpp @@ -0,0 +1,14 @@ +#include +int main() { + int v; + if (scanf("%d", &v) != 1) + return 1; + if (v != 19) { + printf("%d\n", v + 1); + } else { + // Wrong: should be v + 1 + printf("%d\n", v); + } + fflush(stdout); + return 0; +} diff --git a/cmstestsuite/code/interactive-sleep.cpp b/cmstestsuite/code/interactive-sleep.cpp new file mode 100644 index 0000000000..fa950fc57f --- /dev/null +++ b/cmstestsuite/code/interactive-sleep.cpp @@ -0,0 +1,7 @@ +#include +int op(int code, int a, int b) { + while (true) { + sleep(1); + } + return 0; +} diff --git a/cmstestsuite/code/interactive-tle.cpp b/cmstestsuite/code/interactive-tle.cpp new file mode 100644 index 0000000000..92a934561f --- /dev/null +++ b/cmstestsuite/code/interactive-tle.cpp @@ -0,0 +1,5 @@ +int op(int code, int a, int b) { + while (1) + ; + return 0; +} diff --git a/cmstestsuite/code/interactive-wrong-protocol.cpp b/cmstestsuite/code/interactive-wrong-protocol.cpp new file mode 100644 index 0000000000..6b3c949180 --- /dev/null +++ b/cmstestsuite/code/interactive-wrong-protocol.cpp @@ -0,0 +1,9 @@ +#include +#include + +int op(int code, int a, int b) { + printf("protocol violation\n"); + fflush(stdout); + exit(0); + return 0; +} diff --git a/cmstestsuite/code/interactive-wrong.cpp b/cmstestsuite/code/interactive-wrong.cpp new file mode 100644 index 0000000000..b6de2a8ce3 --- /dev/null +++ b/cmstestsuite/code/interactive-wrong.cpp @@ -0,0 +1,10 @@ +#include +int op(int code, int a, int b) { + if (code == 0) + return a * b; + else if (code == 1) + return a + b; + else + assert(false); + return 0; +} diff --git a/cmstestsuite/tasks/interactive/__init__.py b/cmstestsuite/tasks/interactive/__init__.py new file mode 100644 index 0000000000..f9b81af122 --- /dev/null +++ b/cmstestsuite/tasks/interactive/__init__.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +# Contest Management System - http://cms-dev.github.io/ +# Copyright © 2026 Luca Versari +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +task_info = { + "name": "interactive", + "title": "Test Interactive Task", + "official_language": "", + "submission_format_choice": "other", + "submission_format": "interactive.%l", + "time_limit_{{dataset_id}}": "1.0", + "memory_limit_{{dataset_id}}": "128", + "task_type_{{dataset_id}}": "Interactive", + "TaskTypeOptions_{{dataset_id}}_Interactive_compilation": "stub", + "TaskTypeOptions_{{dataset_id}}_Interactive_process_limit": "200", + "TaskTypeOptions_{{dataset_id}}_Interactive_concurrent": "true", + "TaskTypeOptions_{{dataset_id}}_Interactive_controller_time_limit": "1.0", + "TaskTypeOptions_{{dataset_id}}_Interactive_controller_wall_limit": "5.0", + "TaskTypeOptions_{{dataset_id}}_Interactive_controller_memory_limit": "128.0", + "score_type_{{dataset_id}}": "Sum", + "score_type_parameters_{{dataset_id}}": "50", +} + +managers = [ + "controller", + "stub.cpp", + "stub.py", +] + +test_cases = [ + ("input0.txt", "input0.out", True), + ("input1.txt", "input1.out", True), +] diff --git a/cmstestsuite/tasks/interactive/code/controller b/cmstestsuite/tasks/interactive/code/controller new file mode 100755 index 0000000000..12e61c2c82 --- /dev/null +++ b/cmstestsuite/tasks/interactive/code/controller @@ -0,0 +1,67 @@ +#!/usr/bin/python3 -sS + +import sys +import os + + +def grade(score, msg, admin_msg=None): + print("SCORE: %.6f" % score, file=sys.stderr) + print("USER_MESSAGE: %s" % msg, file=sys.stderr) + if admin_msg: + print("ADMIN_MESSAGE: %s" % admin_msg, file=sys.stderr) + sys.stderr.flush() + sys.exit(0) + + +def main(): + try: + with open("input.txt", "r") as f: + line = f.read().split() + if len(line) < 3: + grade(0.0, "Failed to read input", "split failed") + a, b, c = map(int, line[:3]) + except Exception as e: + grade(0.0, "Failed to read input", str(e)) + + print("START_SOLUTION") + sys.stdout.flush() + + line = sys.stdin.readline() + if not line: + grade(0.0, "Failed to read FDs", "EOF on stdin") + try: + fdin, fdout = map(int, line.split()) + except Exception as e: + grade(0.0, "Failed to read FDs", str(e)) + + try: + to_sol = os.fdopen(fdin, "w") + from_sol = os.fdopen(fdout, "r") + + to_sol.write("%d %d\n" % (a, b)) + to_sol.flush() + + res1 = from_sol.readline() + if not res1: + grade(0.0, "Ko1!", "EOF from solution 1") + res1 = int(res1.strip()) + + to_sol.write("%d %d\n" % (res1, c)) + to_sol.flush() + + res2 = from_sol.readline() + if not res2: + grade(0.0, "Ko2!", "EOF from solution 2") + res2 = int(res2.strip()) + + if res2 == (a + b) * c: + grade(1.0, "OK!") + else: + grade(0.0, "Wrong answer", "Expected %d, got %d" % ((a + b) * c, res2)) + + except Exception as e: + grade(0.0, "Internal error in controller", str(e)) + + +if __name__ == "__main__": + main() diff --git a/cmstestsuite/tasks/interactive/code/stub.cpp b/cmstestsuite/tasks/interactive/code/stub.cpp new file mode 100644 index 0000000000..5922010cc9 --- /dev/null +++ b/cmstestsuite/tasks/interactive/code/stub.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + +int op(int code, int a, int b); + +int main() { + int a, b; + if (scanf("%d %d", &a, &b) != 2) { + return 1; + } + printf("%d\n", op(0, a, b)); + fflush(stdout); + + if (scanf("%d %d", &a, &b) != 2) + return 1; + printf("%d\n", op(1, a, b)); + fflush(stdout); + + return 0; +} diff --git a/cmstestsuite/tasks/interactive/code/stub.py b/cmstestsuite/tasks/interactive/code/stub.py new file mode 100644 index 0000000000..ccd958433f --- /dev/null +++ b/cmstestsuite/tasks/interactive/code/stub.py @@ -0,0 +1,21 @@ +import sys +import interactive + +def main(): + try: + line = sys.stdin.readline() + if not line: return + a, b = map(int, line.split()) + print(interactive.op(0, a, b)) + sys.stdout.flush() + + line = sys.stdin.readline() + if not line: return + a, b = map(int, line.split()) + print(interactive.op(1, a, b)) + sys.stdout.flush() + except EOFError: + pass + +if __name__ == "__main__": + main() diff --git a/cmstestsuite/tasks/interactive/data/input0.out b/cmstestsuite/tasks/interactive/data/input0.out new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmstestsuite/tasks/interactive/data/input0.txt b/cmstestsuite/tasks/interactive/data/input0.txt new file mode 100644 index 0000000000..797d32b74a --- /dev/null +++ b/cmstestsuite/tasks/interactive/data/input0.txt @@ -0,0 +1 @@ +10 2 30 diff --git a/cmstestsuite/tasks/interactive/data/input1.out b/cmstestsuite/tasks/interactive/data/input1.out new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmstestsuite/tasks/interactive/data/input1.txt b/cmstestsuite/tasks/interactive/data/input1.txt new file mode 100644 index 0000000000..b85905ec0b --- /dev/null +++ b/cmstestsuite/tasks/interactive/data/input1.txt @@ -0,0 +1 @@ +1 2 3 diff --git a/cmstestsuite/tasks/interactive/data/input2.out b/cmstestsuite/tasks/interactive/data/input2.out new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmstestsuite/tasks/interactive/data/input2.txt b/cmstestsuite/tasks/interactive/data/input2.txt new file mode 100644 index 0000000000..3aacd417fc --- /dev/null +++ b/cmstestsuite/tasks/interactive/data/input2.txt @@ -0,0 +1 @@ +2 3 4 diff --git a/cmstestsuite/tasks/interactive_many/__init__.py b/cmstestsuite/tasks/interactive_many/__init__.py new file mode 100644 index 0000000000..cf5467013a --- /dev/null +++ b/cmstestsuite/tasks/interactive_many/__init__.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +# Contest Management System - http://cms-dev.github.io/ +# Copyright © 2026 Luca Versari +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +task_info = { + "name": "interactivemany", + "title": "Test Interactive Many Solutions Task", + "official_language": "", + "submission_format_choice": "other", + "submission_format": "interactivemany.%l", + "time_limit_{{dataset_id}}": "1.0", + "memory_limit_{{dataset_id}}": "128", + "task_type_{{dataset_id}}": "Interactive", + "TaskTypeOptions_{{dataset_id}}_Interactive_compilation": "alone", + "TaskTypeOptions_{{dataset_id}}_Interactive_process_limit": "200", + "TaskTypeOptions_{{dataset_id}}_Interactive_concurrent": "true", + "TaskTypeOptions_{{dataset_id}}_Interactive_controller_time_limit": "1.0", + "TaskTypeOptions_{{dataset_id}}_Interactive_controller_wall_limit": "5.0", + "TaskTypeOptions_{{dataset_id}}_Interactive_controller_memory_limit": "512.0", + "score_type_{{dataset_id}}": "Sum", + "score_type_parameters_{{dataset_id}}": "100", +} + +managers = [ + "controller", +] + +test_cases = [ + ("input0.txt", "input0.out", True), +] diff --git a/cmstestsuite/tasks/interactive_many/code/controller b/cmstestsuite/tasks/interactive_many/code/controller new file mode 100755 index 0000000000..cc0a44f482 --- /dev/null +++ b/cmstestsuite/tasks/interactive_many/code/controller @@ -0,0 +1,59 @@ +#!/usr/bin/python3 -sS + +import sys +import os + + +def grade(score, msg, admin_msg=None): + print("SCORE: %.6f" % score, file=sys.stderr) + print("USER_MESSAGE: %s" % msg, file=sys.stderr) + if admin_msg: + print("ADMIN_MESSAGE: %s" % admin_msg, file=sys.stderr) + sys.stderr.flush() + sys.exit(0) + + +def main(): + num_solutions = 20 + v = 0 + for i in range(num_solutions): + print("START_SOLUTION") + sys.stdout.flush() + + line = sys.stdin.readline() + if not line: + grade(0.0, "Ko!", "EOF on stdin at step %d" % i) + try: + fdin, fdout = map(int, line.split()) + except Exception as e: + grade(0.0, "Ko!", "Failed to read FDs at step %d: %s" % (i, str(e))) + + try: + to_sol = os.fdopen(fdin, "w") + from_sol = os.fdopen(fdout, "r") + + to_sol.write("%d\n" % v) + to_sol.flush() + + res = from_sol.readline() + if not res: + grade(0.0, "Ko!", "no output at step %d" % i) + + v = int(res.strip()) + if v != i + 1: + grade( + 0.0, + "Ko!", + "wrong output at step %d: expected %d, got %d" % (i, i + 1, v), + ) + + to_sol.close() + from_sol.close() + except Exception as e: + grade(0.0, "Ko!", "Internal error at step %d: %s" % (i, str(e))) + + grade(1.0, "OK!") + + +if __name__ == "__main__": + main() diff --git a/cmstestsuite/tasks/interactive_many/data/input0.out b/cmstestsuite/tasks/interactive_many/data/input0.out new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmstestsuite/tasks/interactive_many/data/input0.txt b/cmstestsuite/tasks/interactive_many/data/input0.txt new file mode 100644 index 0000000000..421376db9e --- /dev/null +++ b/cmstestsuite/tasks/interactive_many/data/input0.txt @@ -0,0 +1 @@ +dummy diff --git a/docs/Task types.rst b/docs/Task types.rst index ccb30a5700..81a1edc4c9 100644 --- a/docs/Task types.rst +++ b/docs/Task types.rst @@ -16,7 +16,7 @@ An exception to this is when the contestant's source fails (for example, exceedi Standard task types =================== -CMS ships with four task types: Batch, OutputOnly, Communication, TwoSteps. The first three are well tested and reasonably strong against cheating attempts and stable with respect to the evaluation times. TwoSteps is a somewhat simpler way to implement a special case of a Communication task, but it is substantially less secure with respect to cheating. We suggest avoiding TwoSteps for new tasks, and migrating old tasks to Communication. +CMS ships with five task types: Batch, OutputOnly, Communication, Interactive, TwoSteps. The first three are well tested and reasonably strong against cheating attempts and stable with respect to the evaluation times. Interactive is a more recent addition and is not yet as well-tested. TwoSteps is a somewhat simpler way to implement a special case of a Communication task, but it is substantially less secure with respect to cheating. We suggest avoiding TwoSteps for new tasks, and migrating old tasks to Communication. OutputOnly does not involve programming languages. Batch is tested with all languages CMS supports out of the box, (C, C++, Pascal, Java, C#, Python, PHP, Haskell, Rust), but only with the first five when using a grader. Communication is tested with C, C++, Pascal and Java. TwoSteps only with C. Regardless, with some work all task types should work with all languages. @@ -103,6 +103,47 @@ Communication supports user tests. In addition to the input file, contestant mus .. note:: Particular care must be taken for tasks where the communication through the FIFOs is particularly large or frequent. In these cases, the time to send the data may dominate the actual algorithm runtime, thus making it hard to distinguish between different complexities. +.. _tasktypes_interactive: + +Interactive +----------- + +Interactive tasks are similar to Communication tasks, but they allow the admin-provided manager (called a ``controller``) to dynamically spawn and interact with multiple solution instances. + +The admins must provide an executable manager called ``controller``. The controller is responsible for communicating with CMS to spawn solution processes and for evaluating their behavior. + +The communication between the controller and CMS follows a specific protocol: + +- To start a new solution process, the controller must print ``START_SOLUTION`` followed by a newline to its standard output. +- After each such command, the controller must read two integers from its standard input. These represent the file descriptors (fds) for writing to and reading from the solution process, respectively. +- The controller can then use these file descriptors (for example, with ``fdopen``) to communicate with the solution. + +The controller must report the final outcome of the testcase by writing to its standard error using the following prefixes: + +- ``SCORE: ``: the outcome of the testcase, as a floating point number between 0.0 and 1.0. +- ``USER_MESSAGE: ``: a message for the contestant. +- ``ADMIN_MESSAGE: ``: an optional message for the administrators. + +Special messages like ``translate:success``, ``translate:wrong`` and ``translate:partial`` are supported for the user message. + +Interactive has six parameters: + +- whether the contestant's source is compiled on its own (``alone``) or with an admin-provided stub called :file:`stub.{ext}` (``stub``); +- the maximum number of solution instances the controller can spawn (``process_limit``); +- whether the spawned solutions are assumed to run concurrently (``concurrent``); +- the maximum memory (in MB) that the controller can use; +- the maximum CPU time (in seconds) that the controller can use; +- the maximum wall time (in seconds) that the controller can use. + +The ``concurrent`` parameter controls how the total wall clock time and total memory usage of the submission is computed. If concurrent, then wall clock times are max'd and memory usages added; if not concurrent, then wall clock times are added and memory usages max'd. This only affects the display of statistics, not the actual verdict (for the verdict, each spawned solution gets the full time/memory limit). Also note that CPU times are always added. + +Note that in the current implementation, all file descriptors for communication are preallocated before running the controller, which means that the controller always uses (at least) ``2*process_limit`` file descriptors. Thus it is not a good idea to set process_limit significantly higher than necessary. + +Admins can provide a manager called :file:`stub.{ext}` for each allowed language. The stub serves the same purpose as in the Communication task type. + +Interactive supports user tests. Contestants must provide the stub and their source file. The admin-provided controller will be used. + + TwoSteps -------- diff --git a/setup.py b/setup.py index 210106a98d..be3517f45f 100755 --- a/setup.py +++ b/setup.py @@ -79,6 +79,10 @@ "tasks/communication_stdio/data/*.*", "tasks/communication_stdio_stubbed/code/*", "tasks/communication_stdio_stubbed/data/*.*", + "tasks/interactive/code/*", + "tasks/interactive/data/*.*", + "tasks/interactive_many/code/*", + "tasks/interactive_many/data/*.*", "tasks/outputonly/data/*.*", "tasks/outputonly_comparator/code/*", "tasks/outputonly_comparator/data/*.*", @@ -150,6 +154,7 @@ class build_with_l10n(build): "Batch=cms.grading.tasktypes.Batch:Batch", "BatchAndOutput=cms.grading.tasktypes.BatchAndOutput:BatchAndOutput", "Communication=cms.grading.tasktypes.Communication:Communication", + "Interactive=cms.grading.tasktypes.Interactive:Interactive", "OutputOnly=cms.grading.tasktypes.OutputOnly:OutputOnly", "TwoSteps=cms.grading.tasktypes.TwoSteps:TwoSteps", ],