import io
import os
from pathlib import Path
from typing import Optional, Union, Dict
import shutil
from experimaestro import Config, Param, field
from xpmir.neural.dual import DotDense
from xpmir.neural.huggingface import HFCrossScorer
from xpm_torch import ModuleLoader
from xpm_torch.actions import ExportAction
from xpm_torch.huggingface import TorchHFHub
from xpm_torch.module import ReadmeSection
import logging
logger = logging.getLogger(__name__)
def get_class(name: str):
module_name, class_name = name.split(":")
import importlib
module = importlib.import_module(module_name)
return getattr(module, class_name)
class XPMIRHFHub(TorchHFHub):
"""HF Hub integration for xpmir models.
Extends :class:`~xpm_torch.huggingface.TorchHFHub` with xpmir-specific
README sections (frontmatter, description, usage, results) and
TensorBoard log copying.
"""
def __init__(
self,
config: Config,
*,
doc: Optional[str] = None,
bibtex: Optional[str] = None,
model_id: Optional[str] = None,
evaluations=None,
model_key: Optional[str] = None,
tb_logs: Optional[Dict[str, Path]] = None,
):
super().__init__(config)
self.doc = doc
self.bibtex = bibtex
self.model_id = model_id
self.evaluations = evaluations
self.model_key = model_key
self.tb_logs = tb_logs
def _xpmir_usage_section(self) -> str:
return (
"## Using the model\n\n"
"The model can be loaded with [experimaestro "
"IR](https://experimaestro-ir.readthedocs.io/en/latest/)\n\n"
"To use in further experiments with XPMIR, load the model loader:\n"
"```py\n"
"from xpmir.models import AutoModel\n\n"
f'loader = AutoModel.load_from_hf_hub("{self.model_id}")\n'
"# loader.model is the model config\n"
"# pass loader as an init task to load the weights\n"
"```\n\n"
"For direct inference:\n\n"
"```py\n"
"from xpmir.models import AutoModel\n\n"
f'model = AutoModel.load_from_hf_hub("{self.model_id}", as_instance=True)\n'
'model.rsv("walgreens store sales average", '
'"The average Walgreens salary ranges...")\n'
"```"
)
def _results_section(self) -> str:
out = io.StringIO()
out.write("## Results\n\n")
self.evaluations.output_model_results(self.model_key, file=out)
return out.getvalue()
def _readme_base_sections(self):
sections = [
ReadmeSection("frontmatter", "---\nlibrary_name: xpmir\n---\n"),
]
if self.doc:
sections.append(ReadmeSection("description", f"{self.doc}\n"))
if self.model_id:
sections.append(ReadmeSection("usage", self._xpmir_usage_section()))
if self.evaluations and self.model_key:
sections.append(ReadmeSection("results", self._results_section()))
if self.bibtex:
sections.append(
ReadmeSection(
"citation",
f"## Citation\n\n```bibtex\n{self.bibtex}\n```",
)
)
return sections
def _save_pretrained(self, save_directory: Union[str, Path]):
save_directory = Path(save_directory)
super()._save_pretrained(save_directory)
if self.tb_logs:
runs_dir = save_directory / "runs"
runs_dir.mkdir()
for key, path in self.tb_logs.items():
shutil.copytree(path, runs_dir / key)
[docs]
class XPMIRExportAction(ExportAction):
"""Export action that uses XPMIRHFHub for xpmir-specific README sections."""
doc: Param[str] = field(default="", ignore_default=True)
"""Paper description or title"""
bibtex: Param[str] = field(default="", ignore_default=True)
"""BibTeX citation"""
def get_hub(self):
return XPMIRHFHub(self.loader, doc=self.doc or None, bibtex=self.bibtex or None)
class AutoModel:
@staticmethod
def load_from_hf_hub(
hf_id_or_folder: str,
as_instance: bool = False,
):
"""Loads a model from HuggingFace Hub or from a local folder.
Returns a :class:`~xpm_torch.module.ModuleLoader`. Use
``loader.model`` to access the model config, and ``loader`` itself
as an init task.
If ``as_instance=True``, executes the loader and returns the
ready-to-use model instance directly.
"""
local_files_only = os.environ.get("HF_HUB_OFFLINE", False)
loader = XPMIRHFHub.from_pretrained(
hf_id_or_folder,
local_files_only=local_files_only,
)
if not isinstance(loader, ModuleLoader):
raise TypeError(f"Expected ModuleLoader, got {type(loader)}")
if as_instance:
loader.execute()
return loader.model
return loader
@staticmethod
def push_to_hf_hub(config: Config, *args, **kwargs):
"""Push to HuggingFace Hub
See ModelHubMixin.push_to_hub for the other arguments
"""
return XPMIRHFHub(config).push_to_hub(*args, **kwargs)
@staticmethod
def sentence_scorer(hf_id: str):
"""Loads from hugging face hub using a sentence transformer"""
try:
from sentence_transformers import SentenceTransformer
except Exception:
logger.error(
"Sentence transformer is not installed:"
"pip install -U sentence_transformers"
)
raise
encoder = SentenceTransformer(hf_id)
return DotDense(encoder=encoder)
@staticmethod
def cross_encoder_model(hf_id: str, max_length: int = 512):
"""Loads from huggingface hub in to a form of a cross-scorer, it returns
a sentence_transformer model for cross encoder"""
scorer = HFCrossScorer(hf_id=hf_id, max_length=max_length)
return scorer