Source code for xpmir.text.adapters

from typing import List

from experimaestro import Param
from datamaestro_ir.data import TextRecord
from xpmir.utils.convert import Converter

from .encoders import InputType, RepresentationOutput, TokenizedTextEncoderBase


[docs] class MeanTextEncoder(TokenizedTextEncoderBase[InputType, RepresentationOutput]): """Returns the mean of the word embeddings""" encoder: Param[TokenizedTextEncoderBase[InputType, RepresentationOutput]] def __initialize__(self): self.encoder.__initialize__() def static(self): return self.encoder.static() @property def dimension(self): return self.encoder.dimension def forward(self, texts: List[InputType], options=None) -> RepresentationOutput: # emb_texts = self.encoder(texts, options=options) emb_texts = self.encoder(texts, options=options) # Computes the mean over the time dimension (vocab output is batch x time x dim) emb_texts.value = emb_texts.value.mean(1) return emb_texts
[docs] class TopicTextConverter(Converter[TextRecord, str]): """Extracts the text from a topic""" def __call__(self, input: TextRecord) -> str: return input["text_item"].text