diff --git a/backend/migrations/0024_illias_course_metadata.sql b/backend/migrations/0024_illias_course_metadata.sql index 57542d8..e2185da 100644 --- a/backend/migrations/0024_illias_course_metadata.sql +++ b/backend/migrations/0024_illias_course_metadata.sql @@ -6,7 +6,8 @@ CREATE TABLE IF NOT EXISTS illias_scrape_runs ( ); CREATE TABLE IF NOT EXISTS illias_courses ( - ref_id TEXT PRIMARY KEY, + id INTEGER PRIMARY KEY, + ref_id TEXT NOT NULL UNIQUE, run_id INTEGER NOT NULL, title TEXT NOT NULL, url TEXT NOT NULL, @@ -25,22 +26,22 @@ CREATE TABLE IF NOT EXISTS illias_courses ( ); CREATE TABLE IF NOT EXISTS illias_course_fields ( - course_ref_id TEXT NOT NULL, + course_id INTEGER NOT NULL, key TEXT NOT NULL, value TEXT NOT NULL, - PRIMARY KEY (course_ref_id, key), - FOREIGN KEY (course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE + PRIMARY KEY (course_id, key), + FOREIGN KEY (course_id) REFERENCES illias_courses(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS illias_alma_matches ( - illias_course_ref_id TEXT PRIMARY KEY, + illias_course_id INTEGER PRIMARY KEY, alma_course_id INTEGER, confidence REAL NOT NULL, match_type TEXT NOT NULL, notes TEXT NOT NULL, candidate_count INTEGER NOT NULL, matched_at_unix INTEGER NOT NULL DEFAULT (unixepoch()), - FOREIGN KEY (illias_course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE, + FOREIGN KEY (illias_course_id) REFERENCES illias_courses(id) ON DELETE CASCADE, FOREIGN KEY (alma_course_id) REFERENCES courses(id) ON DELETE SET NULL ); @@ -49,4 +50,3 @@ CREATE INDEX IF NOT EXISTS idx_illias_courses_title CREATE INDEX IF NOT EXISTS idx_illias_alma_matches_alma ON illias_alma_matches(alma_course_id); - diff --git a/backend/src/services/course_catalog.py b/backend/src/services/course_catalog.py index 0a76b76..cdd4672 100644 --- a/backend/src/services/course_catalog.py +++ b/backend/src/services/course_catalog.py @@ -917,7 +917,7 @@ async def _load_illias_metadata(env: Any, course_id: int) -> dict[str, Any] | No m.match_type AS matchType, m.notes FROM illias_alma_matches AS m - JOIN illias_courses AS ic ON ic.ref_id = m.illias_course_ref_id + JOIN illias_courses AS ic ON ic.id = m.illias_course_id WHERE m.alma_course_id = ? ORDER BY m.confidence DESC, ic.title ASC LIMIT 1 diff --git a/data_collection/illias/cli.py b/data_collection/illias/cli.py index ca149fd..01c15ae 100644 --- a/data_collection/illias/cli.py +++ b/data_collection/illias/cli.py @@ -136,7 +136,7 @@ def _run_export_sql(args: argparse.Namespace) -> None: ).fetchall() courses = connection.execute( """ - SELECT ref_id, run_id, title, url, object_type, description, + SELECT id, ref_id, run_id, title, url, object_type, description, availability, registration, deadline, max_participants, tags_json, instructors_json, raw_fields_json, raw_text, imported_at_unix @@ -148,22 +148,22 @@ def _run_export_sql(args: argparse.Namespace) -> None: ).fetchall() fields = connection.execute( """ - SELECT f.course_ref_id, f.key, f.value + SELECT f.course_id, f.key, f.value FROM illias_course_fields AS f - JOIN illias_courses AS c ON c.ref_id = f.course_ref_id + JOIN illias_courses AS c ON c.id = f.course_id WHERE c.run_id = ? - ORDER BY f.course_ref_id, f.key + ORDER BY f.course_id, f.key """, (latest_run_id,), ).fetchall() matches = connection.execute( """ - SELECT m.illias_course_ref_id, m.alma_course_id, m.confidence, m.match_type, + SELECT m.illias_course_id, m.alma_course_id, m.confidence, m.match_type, m.notes, m.candidate_count, m.matched_at_unix FROM illias_alma_matches AS m - JOIN illias_courses AS c ON c.ref_id = m.illias_course_ref_id + JOIN illias_courses AS c ON c.id = m.illias_course_id WHERE c.run_id = ? - ORDER BY m.illias_course_ref_id + ORDER BY m.illias_course_id """, (latest_run_id,), ).fetchall() diff --git a/data_collection/illias/db.py b/data_collection/illias/db.py index 3f18439..58bdd3e 100644 --- a/data_collection/illias/db.py +++ b/data_collection/illias/db.py @@ -20,7 +20,8 @@ ); CREATE TABLE IF NOT EXISTS illias_courses ( - ref_id TEXT PRIMARY KEY, + id INTEGER PRIMARY KEY, + ref_id TEXT NOT NULL UNIQUE, run_id INTEGER NOT NULL, title TEXT NOT NULL, url TEXT NOT NULL, @@ -39,22 +40,22 @@ ); CREATE TABLE IF NOT EXISTS illias_course_fields ( - course_ref_id TEXT NOT NULL, + course_id INTEGER NOT NULL, key TEXT NOT NULL, value TEXT NOT NULL, - PRIMARY KEY (course_ref_id, key), - FOREIGN KEY (course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE + PRIMARY KEY (course_id, key), + FOREIGN KEY (course_id) REFERENCES illias_courses(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS illias_alma_matches ( - illias_course_ref_id TEXT PRIMARY KEY, + illias_course_id INTEGER PRIMARY KEY, alma_course_id INTEGER, confidence REAL NOT NULL, match_type TEXT NOT NULL, notes TEXT NOT NULL, candidate_count INTEGER NOT NULL, matched_at_unix INTEGER NOT NULL DEFAULT (unixepoch()), - FOREIGN KEY (illias_course_ref_id) REFERENCES illias_courses(ref_id) ON DELETE CASCADE + FOREIGN KEY (illias_course_id) REFERENCES illias_courses(id) ON DELETE CASCADE ); CREATE INDEX IF NOT EXISTS idx_illias_courses_title ON illias_courses(title); @@ -93,14 +94,30 @@ def import_scrape(connection: sqlite3.Connection, payload: dict[str, Any]) -> in run_id = int(cursor.lastrowid) for raw_course in payload.get("courses") or []: course = _course_from_mapping(raw_course) - connection.execute( + row = connection.execute( """ - INSERT OR REPLACE INTO illias_courses ( + INSERT INTO illias_courses ( ref_id, run_id, title, url, object_type, description, availability, registration, deadline, max_participants, tags_json, instructors_json, raw_fields_json, raw_text ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(ref_id) DO UPDATE SET + run_id = excluded.run_id, + title = excluded.title, + url = excluded.url, + object_type = excluded.object_type, + description = excluded.description, + availability = excluded.availability, + registration = excluded.registration, + deadline = excluded.deadline, + max_participants = excluded.max_participants, + tags_json = excluded.tags_json, + instructors_json = excluded.instructors_json, + raw_fields_json = excluded.raw_fields_json, + raw_text = excluded.raw_text, + imported_at_unix = unixepoch() + RETURNING id """, ( course.ref_id, @@ -118,14 +135,17 @@ def import_scrape(connection: sqlite3.Connection, payload: dict[str, Any]) -> in json.dumps(course.fields, ensure_ascii=False), course.raw_text, ), - ) - connection.execute("DELETE FROM illias_course_fields WHERE course_ref_id = ?", (course.ref_id,)) + ).fetchone() + if row is None: + raise RuntimeError(f"Failed to upsert ILIAS course {course.ref_id!r}.") + course_id = int(row["id"]) + connection.execute("DELETE FROM illias_course_fields WHERE course_id = ?", (course_id,)) connection.executemany( """ - INSERT INTO illias_course_fields (course_ref_id, key, value) + INSERT INTO illias_course_fields (course_id, key, value) VALUES (?, ?, ?) """, - [(course.ref_id, key, value) for key, value in course.fields.items()], + [(course_id, key, value) for key, value in course.fields.items()], ) connection.commit() return run_id @@ -154,17 +174,28 @@ def load_illias_courses(connection: sqlite3.Connection) -> list[IliasCourse]: def save_matches(connection: sqlite3.Connection, matches: list[CourseMatch]) -> None: initialize_database(connection) connection.execute("DELETE FROM illias_alma_matches") + ref_id_rows = connection.execute("SELECT id, ref_id FROM illias_courses").fetchall() + course_ids_by_ref_id = {row["ref_id"]: int(row["id"]) for row in ref_id_rows} + missing_ref_ids = sorted( + { + match.illias_course_ref_id + for match in matches + if match.illias_course_ref_id not in course_ids_by_ref_id + } + ) + if missing_ref_ids: + raise ValueError(f"Cannot save matches for unknown ILIAS courses: {', '.join(missing_ref_ids)}") connection.executemany( """ INSERT OR REPLACE INTO illias_alma_matches ( - illias_course_ref_id, alma_course_id, confidence, match_type, + illias_course_id, alma_course_id, confidence, match_type, notes, candidate_count ) VALUES (?, ?, ?, ?, ?, ?) """, [ ( - match.illias_course_ref_id, + course_ids_by_ref_id[match.illias_course_ref_id], match.alma_course_id, match.confidence, match.match_type, diff --git a/data_collection/tests/test_illias_db.py b/data_collection/tests/test_illias_db.py index b86a664..5925a79 100644 --- a/data_collection/tests/test_illias_db.py +++ b/data_collection/tests/test_illias_db.py @@ -46,8 +46,9 @@ def test_save_matches_replaces_stale_matches(self) -> None: save_matches(connection, [CourseMatch("2", None, 0.0, "unmatched", "No match.", 0)]) rows = connection.execute( """ - SELECT illias_course_ref_id, alma_course_id, match_type - FROM illias_alma_matches + SELECT ic.ref_id AS illias_course_ref_id, m.alma_course_id, m.match_type + FROM illias_alma_matches AS m + JOIN illias_courses AS ic ON ic.id = m.illias_course_id """ ).fetchall() finally: @@ -58,6 +59,43 @@ def test_save_matches_replaces_stale_matches(self) -> None: self.assertIsNone(rows[0]["alma_course_id"]) self.assertEqual(rows[0]["match_type"], "unmatched") + def test_illias_relationships_use_numeric_course_ids(self) -> None: + with tempfile.TemporaryDirectory() as temporary_directory: + database_path = Path(temporary_directory) / "illias.sqlite" + connection = connect(database_path) + try: + import_scrape( + connection, + { + "source": {"start_url": "https://example.test", "fetched_at_unix": 1}, + "courses": [ + { + "ref_id": "abc_42", + "title": "Current course", + "url": "https://example.test/abc_42", + "fields": {"Availability": "Online"}, + } + ], + }, + ) + save_matches(connection, [CourseMatch("abc_42", None, 0.0, "unmatched", "No match.", 0)]) + row = connection.execute( + """ + SELECT c.id, c.ref_id, f.course_id, m.illias_course_id + FROM illias_courses AS c + JOIN illias_course_fields AS f ON f.course_id = c.id + JOIN illias_alma_matches AS m ON m.illias_course_id = c.id + """ + ).fetchone() + finally: + connection.close() + + self.assertIsNotNone(row) + self.assertIsInstance(row["id"], int) + self.assertEqual(row["ref_id"], "abc_42") + self.assertEqual(row["course_id"], row["id"]) + self.assertEqual(row["illias_course_id"], row["id"]) + if __name__ == "__main__": unittest.main()