build02/nixpkgs-update: scramble order of new jobs
This commit is contained in:
parent
a4b82fa209
commit
6de26b616a
1 changed files with 53 additions and 8 deletions
|
@ -48,10 +48,10 @@ protocol for communication. The following commands are expected:
|
||||||
|
|
||||||
The possible responses to either command are:
|
The possible responses to either command are:
|
||||||
|
|
||||||
NOJOB
|
NOJOBS
|
||||||
JOB <attribute_path> <rest_of_line>
|
JOB <attribute_path> <rest_of_line>
|
||||||
|
|
||||||
When a worker receives NOJOB, it is expected to wait some period of
|
When a worker receives NOJOBS, it is expected to wait some period of
|
||||||
time before asking for jobs again (with READY).
|
time before asking for jobs again (with READY).
|
||||||
|
|
||||||
When a worker receives JOB, it is expected to complete that job and
|
When a worker receives JOB, it is expected to complete that job and
|
||||||
|
@ -63,6 +63,7 @@ This code has a unit test suite in supervisor_test.py.
|
||||||
import asyncio
|
import asyncio
|
||||||
import contextlib
|
import contextlib
|
||||||
import functools
|
import functools
|
||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
@ -110,6 +111,18 @@ JOB = b"JOB "
|
||||||
NOJOBS = b"NOJOBS\n"
|
NOJOBS = b"NOJOBS\n"
|
||||||
|
|
||||||
|
|
||||||
|
def jitter_hash(data: str) -> int:
|
||||||
|
"""
|
||||||
|
Deterministically produce a 32-bit integer from a string, not correlated
|
||||||
|
with the sort order of the string.
|
||||||
|
|
||||||
|
The implementation shouldn't matter but if it ever changes, the database
|
||||||
|
queue will need to be recreated.
|
||||||
|
"""
|
||||||
|
h = hashlib.md5(data.encode(), usedforsecurity=False)
|
||||||
|
return int.from_bytes(h.digest()[:4], byteorder="little", signed=True)
|
||||||
|
|
||||||
|
|
||||||
class Storage:
|
class Storage:
|
||||||
"""
|
"""
|
||||||
Abstracts over the database that stores the supervisor state.
|
Abstracts over the database that stores the supervisor state.
|
||||||
|
@ -181,7 +194,7 @@ class Storage:
|
||||||
return decorator if fun is None else decorator(fun)
|
return decorator if fun is None else decorator(fun)
|
||||||
|
|
||||||
@_cursor_method
|
@_cursor_method
|
||||||
def upgrade(self, cur: sqlite3.Cursor, version: int = 1) -> None:
|
def upgrade(self, cur: sqlite3.Cursor, version: int = 2) -> None:
|
||||||
"""
|
"""
|
||||||
Create or update the database schema.
|
Create or update the database schema.
|
||||||
|
|
||||||
|
@ -302,6 +315,38 @@ class Storage:
|
||||||
WHERE `queue`.`attr_path` = NEW.`attr_path`;
|
WHERE `queue`.`attr_path` = NEW.`attr_path`;
|
||||||
END;
|
END;
|
||||||
|
|
||||||
|
COMMIT;
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if user_version < 2 <= version:
|
||||||
|
# We want to add some disorder to the initial order of packages;
|
||||||
|
# dispatching packages that are alphabetically adjacent increases
|
||||||
|
# the chances of parallel duplicates or getting stuck on a large
|
||||||
|
# block of time-consuming packages.
|
||||||
|
#
|
||||||
|
# Unfortunately, to apply this jitter retroactively we need to
|
||||||
|
# delete most of the rows already in the database.
|
||||||
|
cur.executescript(
|
||||||
|
"""
|
||||||
|
BEGIN;
|
||||||
|
|
||||||
|
DELETE FROM `fetcher_runs`;
|
||||||
|
DELETE FROM `log`;
|
||||||
|
|
||||||
|
ALTER TABLE `queue`
|
||||||
|
ADD COLUMN `order_jitter` `integer`;
|
||||||
|
|
||||||
|
DROP INDEX `queue_order_index`;
|
||||||
|
|
||||||
|
CREATE INDEX `queue_order_index`
|
||||||
|
ON `queue` (
|
||||||
|
`last_started` ASC,
|
||||||
|
`order_jitter`,
|
||||||
|
`attr_path`,
|
||||||
|
`fetcher_id`,
|
||||||
|
`fetcher_run_started` DESC
|
||||||
|
);
|
||||||
|
|
||||||
COMMIT;
|
COMMIT;
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
@ -378,13 +423,13 @@ class Storage:
|
||||||
"""
|
"""
|
||||||
cur.executemany(
|
cur.executemany(
|
||||||
"""
|
"""
|
||||||
INSERT INTO `queue` (`fetcher_id`, `fetcher_run_started`, `attr_path`, `payload`)
|
INSERT INTO `queue` (`fetcher_id`, `fetcher_run_started`, `attr_path`, `payload`, `order_jitter`)
|
||||||
SELECT `fetcher_id`, ?, ?, ?
|
SELECT `fetcher_id`, ?, ?, ?, ?
|
||||||
FROM `fetchers`
|
FROM `fetchers`
|
||||||
WHERE `name` = ?
|
WHERE `name` = ?
|
||||||
ON CONFLICT DO UPDATE SET `payload` = excluded.`payload`
|
ON CONFLICT DO UPDATE SET `payload` = excluded.`payload`
|
||||||
""",
|
""",
|
||||||
((run_started, a, p, fetcher) for a, p in entries),
|
((run_started, a, p, jitter_hash(a), fetcher) for a, p in entries),
|
||||||
)
|
)
|
||||||
|
|
||||||
@_cursor_method(transaction=True)
|
@_cursor_method(transaction=True)
|
||||||
|
@ -414,11 +459,11 @@ class Storage:
|
||||||
`payload`
|
`payload`
|
||||||
FROM `queue`
|
FROM `queue`
|
||||||
LEFT JOIN `log` USING (`attr_path`)
|
LEFT JOIN `log` USING (`attr_path`)
|
||||||
GROUP BY `last_started`, `attr_path`, `fetcher_id`
|
GROUP BY `last_started`, `order_jitter`, `attr_path`, `fetcher_id`
|
||||||
HAVING `is_dequeued` = 0
|
HAVING `is_dequeued` = 0
|
||||||
AND (`started` IS NULL OR `finished` IS NOT NULL OR `started` + 43200 < ?)
|
AND (`started` IS NULL OR `finished` IS NOT NULL OR `started` + 43200 < ?)
|
||||||
AND (TRUE OR `max`(`fetcher_run_started`))
|
AND (TRUE OR `max`(`fetcher_run_started`))
|
||||||
ORDER BY `last_started` ASC, `attr_path`, `fetcher_id`
|
ORDER BY `last_started` ASC, `order_jitter`, `attr_path`, `fetcher_id`
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
""",
|
""",
|
||||||
(start_time,),
|
(start_time,),
|
||||||
|
|
Loading…
Add table
Reference in a new issue