"""Dictionary utilities, e.g. synonym dictionaries.
Authors
* Sylvain de Langen 2024"""
import json
from collections import defaultdict
from typing import Iterable
class SynonymDictionary:
"""Loads sets of synonym words and lets you look up if two words are
synonyms.
This could, for instance, be used to check for equality in the case of two
spellings of the same word when normalization might be unsuitable.
Synonyms are not considered to be transitive:
If A is a synonym of B and B is a synonym of C, then A is NOT considered a
synonym of C unless they are added in the same synonym set."""
def __init__(self):
self.word_map = defaultdict(set)
@staticmethod
def from_json_file(file) -> "SynonymDictionary":
"""Parses an opened file as JSON, where the top level structure is a
list of sets of synonyms (i.e. words that are all synonyms with each
other), e.g. `[ ["hello", "hi"], ["say", "speak", "talk"] ]`.
Arguments
---------
file : file object
File object that supports reading (e.g. an `open`ed file)
Returns
-------
SynonymDictionary
Synonym dictionary frm the parsed JSON file with all synonym sets
added.
"""
d = json.load(file)
synonym_dict = SynonymDictionary()
for entry in d:
if isinstance(entry, list):
synonym_dict.add_synonym_set(entry)
else:
raise ValueError(
f"Unexpected entry type {type(entry)} in synonyms JSON (expected list)"
)
return synonym_dict
@staticmethod
def from_json_path(path) -> "SynonymDictionary":
"""Opens a file and parses it as JSON, with otherwise the same semantics
as :meth:`~SynonymDictionary.from_json_file`, which uses an opened file.
Arguments
---------
path : str
Path to the JSON file
Returns
-------
SynonymDictionary
Synonym dictionary frm the parsed JSON file with all synonym sets
added.
"""
with open(path, "r", encoding="utf8") as f:
return SynonymDictionary.from_json_file(f)
def add_synonym_set(self, words: Iterable[str]) -> None:
"""Add a set of words that are all synonyms with each other.
Arguments
---------
words : Iterable[str]
List of words that should be defined as synonyms to each other"""
word_set = set(words)
for word in word_set:
self.word_map[word].update(word_set - {word})
def __call__(self, a: str, b: str) -> bool:
"""Check for the equality or synonym equality of two words.
Arguments
---------
a : str
First word to compare. May be outside of the known dictionary.
b : str
Second word to compare. May be outside of the known dictionary.
The order of arguments does not matter.
Returns
-------
bool
Whether `a` and `b` should be considered synonyms. Not transitive,
see the main class documentation."""
return (a == b) or (b in self.word_map[a])
def get_synonyms_for(self, word: str) -> set:
"""Returns the set of synonyms for a given word.
Arguments
---------
word : str
The word to look up the synonyms of. May be outside of the known
dictionary.
Returns
-------
set of str
Set of known synonyms for this word. Do not mutate (or copy it
prior). May be empty if the word has no known synonyms."""
return self.word_map.get(word, set())