Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ retriever = [
# Sparse retrieval (BM25)
"pyserini==0.43.0",

# Sparse retrieval (BM25S - pure Python, no Java dependency)
"bm25s>=0.2.0",

# Dense retrieval
"faiss-cpu==1.9.0.post1",
"h5py==3.12.1",
Expand Down
86 changes: 70 additions & 16 deletions rankify/retrievers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,82 @@
# rankify/retrievers/__init__.py - MODIFIED VERSION
# rankify/retrievers/__init__.py

from .retriever import Retriever
from .base_retriever import BaseRetriever
from .bm25_retriever import BM25Retriever
from .dense_retriever import DenseRetriever
from .ance_retriever import ANCERetriever # NEW IMPORT
from .bge_retriever import BGERetriever
from .colbert_retriever import ColBERTRetriever
from .contriever_retriever import ContrieverRetriever
from .online_retriever import OnlineRetriever
from .hyde_retriever import HydeRetriever
from .diver_dense_retriever import DiverDenseRetriever
from .diver_bm25_retriever import DiverBM25Retriever
from .reasonir_retriever import ReasonIRRetriever
from .reasonembed_retriever import ReasonEmbedRetriever
from .bge_reasoner_retriever import BgeReasonerRetriever
from .bm25s_retriever import BM25SRetriever

try:
from .bm25_retriever import BM25Retriever
except ImportError:
BM25Retriever = None # type: ignore[assignment,misc]

try:
from .dense_retriever import DenseRetriever
except ImportError:
DenseRetriever = None # type: ignore[assignment,misc]

try:
from .ance_retriever import ANCERetriever
except ImportError:
ANCERetriever = None # type: ignore[assignment,misc]

try:
from .bge_retriever import BGERetriever
except ImportError:
BGERetriever = None # type: ignore[assignment,misc]

try:
from .colbert_retriever import ColBERTRetriever
except ImportError:
ColBERTRetriever = None # type: ignore[assignment,misc]

try:
from .contriever_retriever import ContrieverRetriever
except ImportError:
ContrieverRetriever = None # type: ignore[assignment,misc]

try:
from .online_retriever import OnlineRetriever
except ImportError:
OnlineRetriever = None # type: ignore[assignment,misc]

try:
from .hyde_retriever import HydeRetriever
except ImportError:
HydeRetriever = None # type: ignore[assignment,misc]

try:
from .diver_dense_retriever import DiverDenseRetriever
except ImportError:
DiverDenseRetriever = None # type: ignore[assignment,misc]

try:
from .diver_bm25_retriever import DiverBM25Retriever
except ImportError:
DiverBM25Retriever = None # type: ignore[assignment,misc]

try:
from .reasonir_retriever import ReasonIRRetriever
except ImportError:
ReasonIRRetriever = None # type: ignore[assignment,misc]

try:
from .reasonembed_retriever import ReasonEmbedRetriever
except ImportError:
ReasonEmbedRetriever = None # type: ignore[assignment,misc]

try:
from .bge_reasoner_retriever import BgeReasonerRetriever
except ImportError:
BgeReasonerRetriever = None # type: ignore[assignment,misc]


__all__ = [
"Retriever",
"BaseRetriever",
"BaseRetriever",
"BM25SRetriever",
"BM25Retriever",
"DenseRetriever",
"ANCERetriever", # NEW EXPORT
"ANCERetriever",
"BGERetriever",
"ColBERTRetriever",
"ContrieverRetriever",
Expand Down
18 changes: 15 additions & 3 deletions rankify/retrievers/bm25_retriever.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
# bm25_retriever.py
import json
from typing import List
from pyserini.search.lucene import LuceneSearcher
from pyserini.eval.evaluate_dpr_retrieval import has_answers, SimpleTokenizer
from tqdm import tqdm
import os
from .base_retriever import BaseRetriever
from .index_manager import IndexManager
from rankify.dataset.dataset import Document, Context

try:
from pyserini.search.lucene import LuceneSearcher
from pyserini.eval.evaluate_dpr_retrieval import has_answers, SimpleTokenizer
_PYSERINI_AVAILABLE = True
except ImportError:
_PYSERINI_AVAILABLE = False

class BM25Retriever(BaseRetriever):
"""
BM25 retriever implementation using Pyserini's LuceneSearcher.
Expand All @@ -17,6 +22,13 @@ class BM25Retriever(BaseRetriever):
"""

def __init__(self, index_type: str = "wiki", index_folder: str = None, **kwargs):
if not _PYSERINI_AVAILABLE:
raise ImportError(
"pyserini is required for BM25Retriever. "
"Install it with: pip install pyserini "
"Or use BM25SRetriever for a pure-Python alternative: "
"Retriever(method='bm25s', ...)"
)
super().__init__(**kwargs)
self.index_type = index_type
self.index_folder = index_folder
Expand Down Expand Up @@ -48,7 +60,7 @@ def _load_reverse_mapping(self):
fwd = json.load(f) # { "orig_id": 123 }
m = {str(v): k for k, v in fwd.items()} # ensure string keys
return m
def _initialize_searcher(self) -> LuceneSearcher:
def _initialize_searcher(self):
"""Initialize Lucene searcher."""
if self.index_path.startswith("wikipedia-") or "prebuilt" in self.index_path:
return LuceneSearcher.from_prebuilt_index(self.index_path)
Expand Down
Loading