From e24a93bbfd85e3ae7d779efffbb98edc950cf677 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 21 Apr 2026 00:34:20 -0700 Subject: [PATCH 1/2] add modal b200 submission rate limit --- src/kernelbot/api/main.py | 9 +++- src/libkernelbot/backend.py | 16 ++++++- src/libkernelbot/consts.py | 3 ++ src/libkernelbot/leaderboard_db.py | 44 ++++++++++++++++++- src/libkernelbot/submission.py | 38 +++++++++++++++- ...60421_01_modal-b200-rate-limit-metadata.py | 29 ++++++++++++ tests/test_leaderboard_db.py | 44 +++++++++++++++++++ tests/test_submission.py | 27 ++++++++++++ 8 files changed, 204 insertions(+), 6 deletions(-) create mode 100644 src/migrations/20260421_01_modal-b200-rate-limit-metadata.py diff --git a/src/kernelbot/api/main.py b/src/kernelbot/api/main.py index 3417efa1..52a3cd6d 100644 --- a/src/kernelbot/api/main.py +++ b/src/kernelbot/api/main.py @@ -15,12 +15,17 @@ from libkernelbot.background_submission_manager import BackgroundSubmissionManager from libkernelbot.consts import SubmissionMode from libkernelbot.db_types import IdentityType -from libkernelbot.kernelguard import KernelGuardRejected, enforce_submission_precheck, should_precheck_submission +from libkernelbot.kernelguard import ( + KernelGuardRejected, + enforce_submission_precheck, + should_precheck_submission, +) from libkernelbot.leaderboard_db import LeaderboardDB, LeaderboardRankedEntry from libkernelbot.problem_sync import sync_problems from libkernelbot.submission import ( ProcessedSubmissionRequest, SubmissionRequest, + enforce_gpu_rate_limits, prepare_submission, ) from libkernelbot.task import make_task_definition @@ -500,6 +505,7 @@ async def enqueue_background_job( ): # pre-create the submission for api returns with backend.db as db: + enforce_gpu_rate_limits(req, db) sub_id = db.create_submission( leaderboard=req.leaderboard, file_name=req.file_name, @@ -508,6 +514,7 @@ async def enqueue_background_job( time=datetime.datetime.now(datetime.timezone.utc), user_name=req.user_name, mode_category=req.mode_category, + requested_gpus=req.gpus, ) job_id = db.upsert_submission_job_status(sub_id, "initial", None) # put submission request in queue diff --git a/src/libkernelbot/backend.py b/src/libkernelbot/backend.py index 60214b28..ec7f509f 100644 --- a/src/libkernelbot/backend.py +++ b/src/libkernelbot/backend.py @@ -4,7 +4,13 @@ from types import SimpleNamespace from typing import Optional -from libkernelbot.consts import GPU, GPU_TO_SM, SubmissionMode, get_gpu_by_name, get_mode_category +from libkernelbot.consts import ( + GPU, + GPU_TO_SM, + SubmissionMode, + get_gpu_by_name, + get_mode_category, +) from libkernelbot.kernelguard import ( KernelGuardRejected, enforce_submission_precheck, @@ -19,7 +25,11 @@ make_short_report, ) from libkernelbot.run_eval import FullResult -from libkernelbot.submission import ProcessedSubmissionRequest, compute_score +from libkernelbot.submission import ( + ProcessedSubmissionRequest, + compute_score, + enforce_gpu_rate_limits, +) from libkernelbot.task import LeaderboardTask, build_task_config from libkernelbot.utils import setup_logging @@ -68,6 +78,7 @@ async def submit_full( sub_id = pre_sub_id else: with self.db as db: + enforce_gpu_rate_limits(req, db) sub_id = db.create_submission( leaderboard=req.leaderboard, file_name=req.file_name, @@ -76,6 +87,7 @@ async def submit_full( time=datetime.datetime.now(datetime.timezone.utc), user_name=req.user_name, mode_category=req.mode_category or get_mode_category(mode), + requested_gpus=req.gpus, ) selected_gpus = [get_gpu_by_name(gpu) for gpu in req.gpus] submission_started = False diff --git a/src/libkernelbot/consts.py b/src/libkernelbot/consts.py index 55113e76..f9e4b64f 100644 --- a/src/libkernelbot/consts.py +++ b/src/libkernelbot/consts.py @@ -119,6 +119,9 @@ class RankCriterion(Enum): GEOM = "geom" # geometric mean of all benchmarks +MODAL_B200_MAX_SUBMISSIONS_PER_HOUR = 1 + + GPU_TO_SM = { "T4": "75", "L4": "89", diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index 5ab23924..5b4f72c8 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -278,11 +278,16 @@ def create_submission( time: datetime.datetime, user_name: str = None, mode_category: str = None, + requested_gpus: Optional[list[str] | str] = None, ) -> Optional[int]: try: if time.tzinfo is None: time = time.astimezone() time = time.astimezone(datetime.timezone.utc) + if requested_gpus is None: + requested_gpus = [] + elif isinstance(requested_gpus, str): + requested_gpus = [requested_gpus] # check if we already have the code self.cursor.execute( @@ -329,10 +334,10 @@ def create_submission( self.cursor.execute( """ INSERT INTO leaderboard.submission (leaderboard_id, file_name, - user_id, code_id, submission_time, mode_category) + user_id, code_id, submission_time, mode_category, requested_gpus) VALUES ( (SELECT id FROM leaderboard.leaderboard WHERE name = %s), - %s, %s, %s, %s, %s) + %s, %s, %s, %s, %s, %s) RETURNING id """, ( @@ -342,6 +347,7 @@ def create_submission( code_id, time, mode_category, + requested_gpus, ), ) submission_id = self.cursor.fetchone()[0] @@ -1778,6 +1784,40 @@ def check_rate_limit( logger.exception("Error checking rate limit", exc_info=e) raise KernelBotError("Error checking rate limit") from e + def check_gpu_submission_rate_limit( + self, user_id: str, gpu_type: str, max_per_hour: int + ) -> dict: + """Check if a user has exceeded a per-GPU submission limit over the last hour.""" + try: + self.cursor.execute( + """ + SELECT COUNT(*), MIN(submission_time) + FROM leaderboard.submission + WHERE user_id = %s + AND requested_gpus @> ARRAY[%s]::TEXT[] + AND submission_time > NOW() - INTERVAL '1 hour' + """, + (str(user_id), gpu_type), + ) + current_count, oldest_time = self.cursor.fetchone() + allowed = current_count < max_per_hour + retry_after = 0 + if not allowed and oldest_time is not None: + expiry = oldest_time + datetime.timedelta(hours=1) + now = datetime.datetime.now(datetime.timezone.utc) + retry_after = max(0, int((expiry - now).total_seconds())) + + return { + "allowed": allowed, + "current_count": current_count, + "max_per_hour": max_per_hour, + "retry_after_seconds": retry_after, + } + except psycopg2.Error as e: + self.connection.rollback() + logger.exception("Error checking GPU submission rate limit", exc_info=e) + raise KernelBotError("Error checking GPU submission rate limit") from e + class LeaderboardDoesNotExist(KernelBotError): def __init__(self, name: str): diff --git a/src/libkernelbot/submission.py b/src/libkernelbot/submission.py index 69b83b24..b6595418 100644 --- a/src/libkernelbot/submission.py +++ b/src/libkernelbot/submission.py @@ -7,7 +7,13 @@ from better_profanity import profanity -from libkernelbot.consts import RankCriterion, SubmissionMode, get_mode_category +from libkernelbot.consts import ( + MODAL_B200_MAX_SUBMISSIONS_PER_HOUR, + ModalGPU, + RankCriterion, + SubmissionMode, + get_mode_category, +) from libkernelbot.db_types import RunItem, SubmissionItem from libkernelbot.leaderboard_db import LeaderboardDB, LeaderboardItem from libkernelbot.run_eval import FullResult @@ -41,6 +47,36 @@ class ProcessedSubmissionRequest(SubmissionRequest): mode_category: str = None +def normalize_requested_gpus(gpus: Union[None, str, list]) -> list[str]: + if gpus is None: + return [] + if isinstance(gpus, str): + return [gpus] + return list(gpus) + + +def enforce_gpu_rate_limits(req: SubmissionRequest, db: LeaderboardDB) -> None: + requested_gpus = normalize_requested_gpus(req.gpus) + if ModalGPU.B200.value not in requested_gpus: + return + + rate_check = db.check_gpu_submission_rate_limit( + str(req.user_id), + ModalGPU.B200.value, + MODAL_B200_MAX_SUBMISSIONS_PER_HOUR, + ) + if rate_check["allowed"]: + return + + raise KernelBotError( + "Rate limit exceeded: " + f"{rate_check['current_count']}/{rate_check['max_per_hour']} Modal B200 submissions " + "per hour. " + f"Try again in {rate_check['retry_after_seconds']}s.", + code=429, + ) + + def prepare_submission( # noqa: C901 req: SubmissionRequest, backend: "KernelBackend", mode: SubmissionMode = None ) -> ProcessedSubmissionRequest: diff --git a/src/migrations/20260421_01_modal-b200-rate-limit-metadata.py b/src/migrations/20260421_01_modal-b200-rate-limit-metadata.py new file mode 100644 index 00000000..a228e5a6 --- /dev/null +++ b/src/migrations/20260421_01_modal-b200-rate-limit-metadata.py @@ -0,0 +1,29 @@ +""" +Track requested GPUs on submission rows so GPU-specific rate limits can apply before queueing. +""" + +from yoyo import step + +__depends__ = {"20260318_01_ban-user"} + +steps = [ + step( + """ + ALTER TABLE leaderboard.submission + ADD COLUMN requested_gpus TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[]; + """, + """ + ALTER TABLE leaderboard.submission + DROP COLUMN requested_gpus; + """, + ), + step( + """ + CREATE INDEX leaderboard_submission_requested_gpus_idx + ON leaderboard.submission USING GIN (requested_gpus); + """, + """ + DROP INDEX leaderboard.leaderboard_submission_requested_gpus_idx; + """, + ), +] diff --git a/tests/test_leaderboard_db.py b/tests/test_leaderboard_db.py index 1ebf3b29..00af243f 100644 --- a/tests/test_leaderboard_db.py +++ b/tests/test_leaderboard_db.py @@ -1198,3 +1198,47 @@ def test_check_rate_limit_categories_independent(database, submit_leaderboard): # Test should be blocked result = db.check_rate_limit("submit-leaderboard", "123", "test") assert result["allowed"] is False + + +def test_check_gpu_submission_rate_limit_under_limit(database, submit_leaderboard): + """GPU-specific rate limit counts only submissions that requested that GPU.""" + with database as db: + db.create_submission( + "submit-leaderboard", + "test.py", + 123, + "code1", + datetime.datetime.now(), + requested_gpus=["A100", "B200"], + ) + db.create_submission( + "submit-leaderboard", + "other.py", + 123, + "code2", + datetime.datetime.now(), + requested_gpus=["A100"], + ) + result = db.check_gpu_submission_rate_limit("123", "B200", 2) + assert result["allowed"] is True + assert result["current_count"] == 1 + assert result["max_per_hour"] == 2 + + +def test_check_gpu_submission_rate_limit_at_limit(database, submit_leaderboard): + """GPU-specific rate limit blocks once the hourly cap is reached.""" + with database as db: + for i in range(2): + db.create_submission( + "submit-leaderboard", + f"test{i}.py", + 123, + f"code{i}", + datetime.datetime.now(), + requested_gpus=["B200"], + ) + result = db.check_gpu_submission_rate_limit("123", "B200", 2) + assert result["allowed"] is False + assert result["current_count"] == 2 + assert result["max_per_hour"] == 2 + assert result["retry_after_seconds"] >= 0 diff --git a/tests/test_submission.py b/tests/test_submission.py index f2bced05..1f5c3f6e 100644 --- a/tests/test_submission.py +++ b/tests/test_submission.py @@ -32,6 +32,13 @@ def mock_backend(): } db_context.get_leaderboard_gpu_types.return_value = ["A100", "V100"] db_context.is_user_banned.return_value = False + db_context.check_rate_limit.return_value = None + db_context.check_gpu_submission_rate_limit.return_value = { + "allowed": True, + "current_count": 0, + "max_per_hour": 1, + "retry_after_seconds": 0, + } return backend @@ -297,6 +304,26 @@ def test_prepare_submission_checks(mock_backend): submission.prepare_submission(req, mock_backend) +def test_enforce_gpu_rate_limits_blocks_modal_b200(mock_backend): + mock_backend.db.check_gpu_submission_rate_limit.return_value = { + "allowed": False, + "current_count": 1, + "max_per_hour": 1, + "retry_after_seconds": 123, + } + req = submission.SubmissionRequest( + code="print('hello world')", + file_name="test.py", + user_id=2, + user_name="test_user2", + gpus=["B200"], + leaderboard="test_board", + ) + + with pytest.raises(KernelBotError, match="Modal B200 submissions per hour"): + submission.enforce_gpu_rate_limits(req, mock_backend.db) + + def test_compute_score(): mock_task = mock.Mock() mock_result = mock.Mock() From 758bf59f8266d777728b8a11c59c41ada9dc0206 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 21 Apr 2026 00:36:55 -0700 Subject: [PATCH 2/2] dedupe submission creation path --- src/kernelbot/api/main.py | 13 +------------ src/libkernelbot/backend.py | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/kernelbot/api/main.py b/src/kernelbot/api/main.py index 52a3cd6d..4434731e 100644 --- a/src/kernelbot/api/main.py +++ b/src/kernelbot/api/main.py @@ -25,7 +25,6 @@ from libkernelbot.submission import ( ProcessedSubmissionRequest, SubmissionRequest, - enforce_gpu_rate_limits, prepare_submission, ) from libkernelbot.task import make_task_definition @@ -504,18 +503,8 @@ async def enqueue_background_job( manager: BackgroundSubmissionManager, ): # pre-create the submission for api returns + sub_id = backend.create_submission_record(req, mode) with backend.db as db: - enforce_gpu_rate_limits(req, db) - sub_id = db.create_submission( - leaderboard=req.leaderboard, - file_name=req.file_name, - code=req.code, - user_id=req.user_id, - time=datetime.datetime.now(datetime.timezone.utc), - user_name=req.user_name, - mode_category=req.mode_category, - requested_gpus=req.gpus, - ) job_id = db.upsert_submission_job_status(sub_id, "initial", None) # put submission request in queue await manager.enqueue(req, mode, sub_id) diff --git a/src/libkernelbot/backend.py b/src/libkernelbot/backend.py index ec7f509f..2dbf9a0b 100644 --- a/src/libkernelbot/backend.py +++ b/src/libkernelbot/backend.py @@ -62,6 +62,24 @@ def register_launcher(self, launcher: Launcher): for gpu in launcher.gpus: self.launcher_map[gpu.value] = launcher + def create_submission_record( + self, + req: ProcessedSubmissionRequest, + mode: SubmissionMode, + ) -> int: + with self.db as db: + enforce_gpu_rate_limits(req, db) + return db.create_submission( + leaderboard=req.leaderboard, + file_name=req.file_name, + code=req.code, + user_id=req.user_id, + time=datetime.datetime.now(datetime.timezone.utc), + user_name=req.user_name, + mode_category=req.mode_category or get_mode_category(mode), + requested_gpus=req.gpus, + ) + async def submit_full( self, req: ProcessedSubmissionRequest, @@ -77,18 +95,7 @@ async def submit_full( if pre_sub_id is not None: sub_id = pre_sub_id else: - with self.db as db: - enforce_gpu_rate_limits(req, db) - sub_id = db.create_submission( - leaderboard=req.leaderboard, - file_name=req.file_name, - code=req.code, - user_id=req.user_id, - time=datetime.datetime.now(datetime.timezone.utc), - user_name=req.user_name, - mode_category=req.mode_category or get_mode_category(mode), - requested_gpus=req.gpus, - ) + sub_id = self.create_submission_record(req, mode) selected_gpus = [get_gpu_by_name(gpu) for gpu in req.gpus] submission_started = False try: