Source code for xpmir.experiments.ir

from typing import Any, List, Optional, Dict
from pathlib import Path
import logging
import pandas as pd
import click
from functools import cached_property
import docstring_parser

from experimaestro import RunMode, Config
from experimaestro.exceptions import HandledException
from xpm_torch.experiments import TensorboardService

from xpmir.evaluation import EvaluationsCollection
from xpmir.models import XPMIRHFHub
from xpmir.papers.results import PaperResults
from xpmir.experiments.learning import LearningExperimentHelper


class UploadToHub:
    def __init__(self, model_id: Optional[str], doc):
        self.model_id = model_id
        self.doc = doc

    def send_scorer(
        self,
        models: Dict[str, Config],
        *,
        evaluations: Optional[EvaluationsCollection] = None,
        tb_logs: Dict[str, Path],
    ):
        """Upload the scorer(s) to the HuggingFace Hub"""
        if self.model_id is None:
            return

        assert len(models) == 1, "Cannot deal with more than one variant"
        ((key, model),) = list(models.items())

        logging.info("Uploading to HuggingFace Hub")
        XPMIRHFHub(
            model,
            doc=str(self.doc),
            model_id=self.model_id,
            model_key=key,
            evaluations=evaluations,
            tb_logs=tb_logs,
        ).push_to_hub(repo_id=self.model_id)


[docs] class IRExperimentHelper(LearningExperimentHelper): """Helper for IR experiments""" def run(self, extra_args: List[str], configuration: Any): @click.option("--upload-to-hub", type=str) @click.command() def cli(upload_to_hub: str): try: results = self.callable(self, configuration) except Exception as e: logging.exception("Error while running the experiment") raise HandledException(e) self.xp.wait() if isinstance(results, PaperResults) and self.xp.run_mode == RunMode.NORMAL: if upload_to_hub is not None: if configuration.title == "" and configuration.description == "": doc = docstring_parser.parse(self.callable.__doc__) else: doc = f"# {configuration.title}\n{configuration.description}" upload = UploadToHub(upload_to_hub, doc) upload.send_scorer( results.models, evaluations=results.evaluations, tb_logs=results.tb_logs, ) # Print the results df = results.evaluations.to_dataframe() pd.set_option("display.max_columns", None) pd.set_option("display.max_rows", None) pd.set_option("display.width", 200) print(df) # noqa: T201 # And save them csv_path = self.xp.resultspath / "results.csv" if not self.xp.resultspath.exists(): self.xp.resultspath.mkdir(parents=True, exist_ok=True) logging.info(f"Saved results in {csv_path.absolute()}") with csv_path.open("wt") as fp: df.to_csv(fp, index=False) return cli(extra_args, standalone_mode=False) @cached_property def tensorboard_service(self): return self.xp.add_service(TensorboardService(self.xp.resultspath / "runs"))
ir_experiment = IRExperimentHelper.decorator """Uses an IR experiment helper that provides 1. Tensorboard service (from Learning) 1. Upload to HuggingFace 1. Printing the evaluation results """