Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions backend/migrations/0024_illias_course_metadata.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ CREATE TABLE IF NOT EXISTS illias_scrape_runs (
);

CREATE TABLE IF NOT EXISTS illias_courses (
ref_id TEXT PRIMARY KEY,
id INTEGER PRIMARY KEY,
ref_id TEXT NOT NULL UNIQUE,
run_id INTEGER NOT NULL,
title TEXT NOT NULL,
url TEXT NOT NULL,
Expand All @@ -25,22 +26,22 @@ CREATE TABLE IF NOT EXISTS illias_courses (
);

CREATE TABLE IF NOT EXISTS illias_course_fields (
course_ref_id TEXT NOT NULL,
course_id INTEGER NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
PRIMARY KEY (course_ref_id, key),
FOREIGN KEY (course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE
PRIMARY KEY (course_id, key),
FOREIGN KEY (course_id) REFERENCES illias_courses(id) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS illias_alma_matches (
illias_course_ref_id TEXT PRIMARY KEY,
illias_course_id INTEGER PRIMARY KEY,
alma_course_id INTEGER,
confidence REAL NOT NULL,
match_type TEXT NOT NULL,
notes TEXT NOT NULL,
candidate_count INTEGER NOT NULL,
matched_at_unix INTEGER NOT NULL DEFAULT (unixepoch()),
FOREIGN KEY (illias_course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE,
FOREIGN KEY (illias_course_id) REFERENCES illias_courses(id) ON DELETE CASCADE,
FOREIGN KEY (alma_course_id) REFERENCES courses(id) ON DELETE SET NULL
);

Expand All @@ -49,4 +50,3 @@ CREATE INDEX IF NOT EXISTS idx_illias_courses_title

CREATE INDEX IF NOT EXISTS idx_illias_alma_matches_alma
ON illias_alma_matches(alma_course_id);

2 changes: 1 addition & 1 deletion backend/src/services/course_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ async def _load_illias_metadata(env: Any, course_id: int) -> dict[str, Any] | No
m.match_type AS matchType,
m.notes
FROM illias_alma_matches AS m
JOIN illias_courses AS ic ON ic.ref_id = m.illias_course_ref_id
JOIN illias_courses AS ic ON ic.id = m.illias_course_id
WHERE m.alma_course_id = ?
ORDER BY m.confidence DESC, ic.title ASC
LIMIT 1
Expand Down
14 changes: 7 additions & 7 deletions data_collection/illias/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def _run_export_sql(args: argparse.Namespace) -> None:
).fetchall()
courses = connection.execute(
"""
SELECT ref_id, run_id, title, url, object_type, description,
SELECT id, ref_id, run_id, title, url, object_type, description,
availability, registration, deadline, max_participants,
tags_json, instructors_json, raw_fields_json, raw_text,
imported_at_unix
Expand All @@ -148,22 +148,22 @@ def _run_export_sql(args: argparse.Namespace) -> None:
).fetchall()
fields = connection.execute(
"""
SELECT f.course_ref_id, f.key, f.value
SELECT f.course_id, f.key, f.value
FROM illias_course_fields AS f
JOIN illias_courses AS c ON c.ref_id = f.course_ref_id
JOIN illias_courses AS c ON c.id = f.course_id
WHERE c.run_id = ?
ORDER BY f.course_ref_id, f.key
ORDER BY f.course_id, f.key
""",
(latest_run_id,),
).fetchall()
matches = connection.execute(
"""
SELECT m.illias_course_ref_id, m.alma_course_id, m.confidence, m.match_type,
SELECT m.illias_course_id, m.alma_course_id, m.confidence, m.match_type,
m.notes, m.candidate_count, m.matched_at_unix
FROM illias_alma_matches AS m
JOIN illias_courses AS c ON c.ref_id = m.illias_course_ref_id
JOIN illias_courses AS c ON c.id = m.illias_course_id
WHERE c.run_id = ?
ORDER BY m.illias_course_ref_id
ORDER BY m.illias_course_id
""",
(latest_run_id,),
).fetchall()
Expand Down
59 changes: 45 additions & 14 deletions data_collection/illias/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
);

CREATE TABLE IF NOT EXISTS illias_courses (
ref_id TEXT PRIMARY KEY,
id INTEGER PRIMARY KEY,
ref_id TEXT NOT NULL UNIQUE,
run_id INTEGER NOT NULL,
title TEXT NOT NULL,
url TEXT NOT NULL,
Expand All @@ -39,22 +40,22 @@
);

CREATE TABLE IF NOT EXISTS illias_course_fields (
course_ref_id TEXT NOT NULL,
course_id INTEGER NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
PRIMARY KEY (course_ref_id, key),
FOREIGN KEY (course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE
PRIMARY KEY (course_id, key),
FOREIGN KEY (course_id) REFERENCES illias_courses(id) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS illias_alma_matches (
illias_course_ref_id TEXT PRIMARY KEY,
illias_course_id INTEGER PRIMARY KEY,
alma_course_id INTEGER,
confidence REAL NOT NULL,
match_type TEXT NOT NULL,
notes TEXT NOT NULL,
candidate_count INTEGER NOT NULL,
matched_at_unix INTEGER NOT NULL DEFAULT (unixepoch()),
FOREIGN KEY (illias_course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE
FOREIGN KEY (illias_course_id) REFERENCES illias_courses(id) ON DELETE CASCADE
);

CREATE INDEX IF NOT EXISTS idx_illias_courses_title ON illias_courses(title);
Expand Down Expand Up @@ -93,14 +94,30 @@ def import_scrape(connection: sqlite3.Connection, payload: dict[str, Any]) -> in
run_id = int(cursor.lastrowid)
for raw_course in payload.get("courses") or []:
course = _course_from_mapping(raw_course)
connection.execute(
row = connection.execute(
"""
INSERT OR REPLACE INTO illias_courses (
INSERT INTO illias_courses (
ref_id, run_id, title, url, object_type, description, availability,
registration, deadline, max_participants, tags_json, instructors_json,
raw_fields_json, raw_text
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(ref_id) DO UPDATE SET
run_id = excluded.run_id,
title = excluded.title,
url = excluded.url,
object_type = excluded.object_type,
description = excluded.description,
availability = excluded.availability,
registration = excluded.registration,
deadline = excluded.deadline,
max_participants = excluded.max_participants,
tags_json = excluded.tags_json,
instructors_json = excluded.instructors_json,
raw_fields_json = excluded.raw_fields_json,
raw_text = excluded.raw_text,
imported_at_unix = unixepoch()
RETURNING id
""",
(
course.ref_id,
Expand All @@ -118,14 +135,17 @@ def import_scrape(connection: sqlite3.Connection, payload: dict[str, Any]) -> in
json.dumps(course.fields, ensure_ascii=False),
course.raw_text,
),
)
connection.execute("DELETE FROM illias_course_fields WHERE course_ref_id = ?", (course.ref_id,))
).fetchone()
if row is None:
raise RuntimeError(f"Failed to upsert ILIAS course {course.ref_id!r}.")
course_id = int(row["id"])
connection.execute("DELETE FROM illias_course_fields WHERE course_id = ?", (course_id,))
connection.executemany(
"""
INSERT INTO illias_course_fields (course_ref_id, key, value)
INSERT INTO illias_course_fields (course_id, key, value)
VALUES (?, ?, ?)
""",
[(course.ref_id, key, value) for key, value in course.fields.items()],
[(course_id, key, value) for key, value in course.fields.items()],
)
connection.commit()
return run_id
Expand Down Expand Up @@ -154,17 +174,28 @@ def load_illias_courses(connection: sqlite3.Connection) -> list[IliasCourse]:
def save_matches(connection: sqlite3.Connection, matches: list[CourseMatch]) -> None:
initialize_database(connection)
connection.execute("DELETE FROM illias_alma_matches")
ref_id_rows = connection.execute("SELECT id, ref_id FROM illias_courses").fetchall()
course_ids_by_ref_id = {row["ref_id"]: int(row["id"]) for row in ref_id_rows}
missing_ref_ids = sorted(
{
match.illias_course_ref_id
for match in matches
if match.illias_course_ref_id not in course_ids_by_ref_id
}
)
if missing_ref_ids:
raise ValueError(f"Cannot save matches for unknown ILIAS courses: {', '.join(missing_ref_ids)}")
connection.executemany(
"""
INSERT OR REPLACE INTO illias_alma_matches (
illias_course_ref_id, alma_course_id, confidence, match_type,
illias_course_id, alma_course_id, confidence, match_type,
notes, candidate_count
)
VALUES (?, ?, ?, ?, ?, ?)
""",
[
(
match.illias_course_ref_id,
course_ids_by_ref_id[match.illias_course_ref_id],
match.alma_course_id,
match.confidence,
match.match_type,
Expand Down
42 changes: 40 additions & 2 deletions data_collection/tests/test_illias_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ def test_save_matches_replaces_stale_matches(self) -> None:
save_matches(connection, [CourseMatch("2", None, 0.0, "unmatched", "No match.", 0)])
rows = connection.execute(
"""
SELECT illias_course_ref_id, alma_course_id, match_type
FROM illias_alma_matches
SELECT ic.ref_id AS illias_course_ref_id, m.alma_course_id, m.match_type
FROM illias_alma_matches AS m
JOIN illias_courses AS ic ON ic.id = m.illias_course_id
"""
).fetchall()
finally:
Expand All @@ -58,6 +59,43 @@ def test_save_matches_replaces_stale_matches(self) -> None:
self.assertIsNone(rows[0]["alma_course_id"])
self.assertEqual(rows[0]["match_type"], "unmatched")

def test_illias_relationships_use_numeric_course_ids(self) -> None:
with tempfile.TemporaryDirectory() as temporary_directory:
database_path = Path(temporary_directory) / "illias.sqlite"
connection = connect(database_path)
try:
import_scrape(
connection,
{
"source": {"start_url": "https://example.test", "fetched_at_unix": 1},
"courses": [
{
"ref_id": "abc_42",
"title": "Current course",
"url": "https://example.test/abc_42",
"fields": {"Availability": "Online"},
}
],
},
)
save_matches(connection, [CourseMatch("abc_42", None, 0.0, "unmatched", "No match.", 0)])
row = connection.execute(
"""
SELECT c.id, c.ref_id, f.course_id, m.illias_course_id
FROM illias_courses AS c
JOIN illias_course_fields AS f ON f.course_id = c.id
JOIN illias_alma_matches AS m ON m.illias_course_id = c.id
"""
).fetchone()
finally:
connection.close()

self.assertIsNotNone(row)
self.assertIsInstance(row["id"], int)
self.assertEqual(row["ref_id"], "abc_42")
self.assertEqual(row["course_id"], row["id"])
self.assertEqual(row["illias_course_id"], row["id"])


if __name__ == "__main__":
unittest.main()
Loading