"""Library for computing the BLEU score based on SacreBLEU SacreBLEU github: https://github.com/mjpost/sacrebleu Authors * Titouan Parcollet 2025 * Mirco Ravanelli 2021 """ from speechbrain.utils.metric_stats import MetricStats class BLEUStats(MetricStats): """A class for tracking corpus-level BLEU (https://www.aclweb.org/anthology/P02-1040.pdf). Each hypothesis can be matched against one or multiple references. Arguments --------- max_ngram_order: int, default 4 The maximum length of the ngrams to use for BLEU scoring. Default is 4. Example ------- >>> bleu = BLEUStats() >>> bleu.append( ... ids=['utterance1', 'utterance2'], ... predict=[ ... 'The dog bit the man.', ... 'It was not surprising.'], ... targets=[ ... ['The dog bit the man.', 'It was not unexpected.'], ... ['The dog had bit the man.', 'No one was surprised.'] ... ] ... ) >>> stats = bleu.summarize() >>> stats['BLEU'] 74.19446627365011 """ def __init__(self, max_ngram_order=4): # Check extra-dependency for computing the bleu score try: from sacrebleu.metrics import BLEU except ImportError: print( "Please install sacrebleu (https://pypi.org/project/sacrebleu/) in order to use the BLEU metric" ) self.clear() self.bleu = BLEU(max_ngram_order=max_ngram_order) self.predicts = [] self.targets = None def append(self, ids, predict, targets): """Add stats to the relevant containers. * See MetricStats.append() Arguments --------- ids : list List of ids corresponding to utterances. predict : list[str] A str which represent the hypotheses. Of dimension [nb_hypotheses] targets : list[list[str]] List of list of reference. The dimensions are as follow: [nb_references, nb_hypotheses]. """ self.ids.extend(ids) self.predicts.extend(predict) if self.targets is None: self.targets = targets else: assert len(self.targets) == len(targets) for i in range(len(self.targets)): self.targets[i].extend(targets[i]) def summarize(self, field=None): """Summarize the BLEU and return relevant statistics. * See MetricStats.summarize() """ scores = self.bleu.corpus_score(self.predicts, self.targets) details = {} details["BLEU"] = scores.score details["BP"] = scores.bp details["ratio"] = scores.sys_len / scores.ref_len details["hyp_len"] = scores.sys_len details["ref_len"] = scores.ref_len details["precisions"] = scores.precisions self.scores = scores self.summary = details # Add additional, more generic key self.summary["bleu_score"] = self.summary["BLEU"] if field is not None: return self.summary[field] else: return self.summary def write_stats(self, filestream): """Write all relevant info (e.g., error rate alignments) to file. * See MetricStats.write_stats() """ if not self.summary: self.summarize() print(self.scores, file=filestream)