Skip to content

Ontology Taxonomy

Extracting the taxonomy from an ontology often comes in handy for graph-based machine learning techniques. Here we provide a basic Taxonomy class built upon networkx.DiGraph where nodes represent entities and edges represent subsumptions. We then provide the OntologyTaxonomy class that extends the basic Taxonomy. It utilises the simple structural reasoner to enrich the ontology subsumptions beyond asserted ones, and build the taxonomy over the expanded subsumptions. Each node represents a named class and has a label (rdfs:label) attribute. The root node owl:Thing is also specified for functions like counting the node depths, etc. Moreover, we provide the WordnetTaxonomy class that wraps the WordNet knowledge graph for easier access.

Note

It is also possible to use OntologyProjector to extract triples from the ontology as edges of the taxonomy. We will consider this feature in the future.

Taxonomy(edges, root_node=None)

Class for building the taxonomy over structured data.

Attributes:

Name Type Description
nodes list

A list of entity ids.

edges list

A list of (parent, child) pairs.

graph networkx.DiGraph

A directed graph that represents the taxonomy.

root_node Optional[str]

Optional root node id. Defaults to None.

Source code in src/deeponto/onto/taxonomy.py
43
44
45
46
47
def __init__(self, edges: list, root_node: Optional[str] = None):
    self.edges = edges
    self.graph = nx.DiGraph(self.edges)
    self.nodes = list(self.graph.nodes)
    self.root_node = root_node

get_node_attributes(entity_id)

Get the attributes of the given entity.

Source code in src/deeponto/onto/taxonomy.py
49
50
51
def get_node_attributes(self, entity_id: str):
    """Get the attributes of the given entity."""
    return self.graph.nodes[entity_id]

get_children(entity_id, apply_transitivity=False)

Get the set of children for a given entity.

Source code in src/deeponto/onto/taxonomy.py
53
54
55
56
57
58
def get_children(self, entity_id: str, apply_transitivity: bool = False):
    r"""Get the set of children for a given entity."""
    if not apply_transitivity:
        return set(self.graph.successors(entity_id))
    else:
        return set(itertools.chain.from_iterable(nx.dfs_successors(self.graph, entity_id).values()))

get_parents(entity_id, apply_transitivity=False)

Get the set of parents for a given entity.

Source code in src/deeponto/onto/taxonomy.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def get_parents(self, entity_id: str, apply_transitivity: bool = False):
    r"""Get the set of parents for a given entity."""
    if not apply_transitivity:
        return set(self.graph.predecessors(entity_id))
    else:
        # NOTE: the nx.dfs_predecessors does not give desirable results
        frontier = list(self.get_parents(entity_id))
        explored = set()
        descendants = frontier
        while frontier:
            for candidate in frontier:
                descendants += list(self.get_parents(candidate))
            explored.update(frontier)
            frontier = set(descendants) - explored
        return set(descendants)

get_descendant_graph(entity_id)

Create a descendant graph (networkx.DiGraph) for a given entity.

Source code in src/deeponto/onto/taxonomy.py
76
77
78
79
def get_descendant_graph(self, entity_id: str):
    r"""Create a descendant graph (`networkx.DiGraph`) for a given entity."""
    descendants = self.get_children(entity_id, apply_transitivity=True)
    return self.graph.subgraph(list(descendants))

get_shortest_node_depth(entity_id)

Get the shortest depth of the given entity in the taxonomy.

Source code in src/deeponto/onto/taxonomy.py
81
82
83
84
85
def get_shortest_node_depth(self, entity_id: str):
    """Get the shortest depth of the given entity in the taxonomy."""
    if not self.root_node:
        raise RuntimeError("No root node specified.")
    return nx.shortest_path_length(self.graph, self.root_node, entity_id)

get_longest_node_depth(entity_id)

Get the longest depth of the given entity in the taxonomy.

Source code in src/deeponto/onto/taxonomy.py
87
88
89
90
91
def get_longest_node_depth(self, entity_id: str):
    """Get the longest depth of the given entity in the taxonomy."""
    if not self.root_node:
        raise RuntimeError("No root node specified.")
    return max([len(p) for p in nx.all_simple_paths(self.graph, self.root_node, entity_id)])

get_lowest_common_ancestor(entity_id1, entity_id2)

Get the lowest common ancestor of the given two entities.

Source code in src/deeponto/onto/taxonomy.py
93
94
95
def get_lowest_common_ancestor(self, entity_id1: str, entity_id2: str):
    """Get the lowest common ancestor of the given two entities."""
    return nx.lowest_common_ancestor(self.graph, entity_id1, entity_id2)

OntologyTaxonomy(onto, reasoner_type='struct')

Bases: Taxonomy

Class for building the taxonomy (top-down subsumption graph) from an ontology.

The nodes of this graph are named classes only, but the hierarchy is enriched (beyond asserted axioms) by an ontology reasoner.

Attributes:

Name Type Description
onto Ontology

The input ontology to build the taxonomy.

reasoner_type str

The type of reasoner used. Defaults to "struct". Options are ["hermit", "elk", "struct"].

reasoner OntologyReasoner

An ontology reasoner used for completing the hierarchy. If the reasoner_type is the same as onto.reasoner_type, then re-use onto.reasoner; otherwise, create a new one.

root_node str

The root node that represents owl:Thing.

nodes list

A list of named class IRIs.

edges list

A list of (parent, child) class pairs. That is, if \(C \sqsubseteq D\), then \((D, C)\) will be added as an edge.

graph networkx.DiGraph

A directed subsumption graph.

Source code in src/deeponto/onto/taxonomy.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def __init__(self, onto: Ontology, reasoner_type: str = "struct"):
    self.onto = onto
    # the reasoner is used for completing the hierarchy
    self.reasoner_type = reasoner_type
    # re-use onto.reasoner if the reasoner type is the same; otherwise create a new one
    self.reasoner = (
        self.onto.reasoner
        if reasoner_type == self.onto.reasoner_type
        else OntologyReasoner(self.onto, reasoner_type)
    )
    root_node = "owl:Thing"
    subsumption_pairs = []
    for cl_iri, cl in self.onto.owl_classes.items():
        # NOTE: this is different from using self.onto.get_asserted_parents which does not conduct simple reasoning
        named_parents = self.reasoner.get_inferred_super_entities(cl, direct=True)
        if not named_parents:
            # if no parents then add root node as the parent
            named_parents.append(root_node)
        for named_parent in named_parents:
            subsumption_pairs.append((named_parent, cl_iri))
    super().__init__(edges=subsumption_pairs, root_node=root_node)

    # set node annotations (rdfs:label)
    for class_iri in self.nodes:
        if class_iri == self.root_node:
            self.graph.nodes[class_iri]["label"] = "Thing"
        else:
            owl_class = self.onto.get_owl_object(class_iri)
            self.graph.nodes[class_iri]["label"] = self.onto.get_annotations(owl_class, RDFS_LABEL)

get_parents(class_iri, apply_transitivity=False)

Get the set of parents for a given class.

It is worth noting that this method with transitivity applied can be deemed as simple structural reasoning. For more advanced logical reasoning, use the DL reasoner self.onto.reasoner instead.

Source code in src/deeponto/onto/taxonomy.py
144
145
146
147
148
149
150
def get_parents(self, class_iri: str, apply_transitivity: bool = False):
    r"""Get the set of parents for a given class.

    It is worth noting that this method with transitivity applied can be deemed as simple structural reasoning.
    For more advanced logical reasoning, use the DL reasoner `self.onto.reasoner` instead.
    """
    return super().get_parents(class_iri, apply_transitivity)

get_children(class_iri, apply_transitivity=False)

Get the set of children for a given class.

It is worth noting that this method with transitivity applied can be deemed as simple structural reasoning. For more advanced logical reasoning, use the DL reasoner self.onto.reasoner instead.

Source code in src/deeponto/onto/taxonomy.py
152
153
154
155
156
157
158
def get_children(self, class_iri: str, apply_transitivity: bool = False):
    r"""Get the set of children for a given class.

    It is worth noting that this method with transitivity applied can be deemed as simple structural reasoning.
    For more advanced logical reasoning, use the DL reasoner `self.onto.reasoner` instead.
    """
    return super().get_children(class_iri, apply_transitivity)

get_descendant_graph(class_iri)

Create a descendant graph (networkx.DiGraph) for a given ontology class.

Source code in src/deeponto/onto/taxonomy.py
160
161
162
def get_descendant_graph(self, class_iri: str):
    r"""Create a descendant graph (`networkx.DiGraph`) for a given ontology class."""
    super().get_descendant_graph(class_iri)

get_shortest_node_depth(class_iri)

Get the shortest depth of the given named class in the taxonomy.

Source code in src/deeponto/onto/taxonomy.py
164
165
166
def get_shortest_node_depth(self, class_iri: str):
    """Get the shortest depth of the given named class in the taxonomy."""
    return nx.shortest_path_length(self.graph, self.root_node, class_iri)

get_longest_node_depth(class_iri)

Get the longest depth of the given named class in the taxonomy.

Source code in src/deeponto/onto/taxonomy.py
168
169
170
def get_longest_node_depth(self, class_iri: str):
    """Get the longest depth of the given named class in the taxonomy."""
    return max([len(p) for p in nx.all_simple_paths(self.graph, self.root_node, class_iri)])

get_lowest_common_ancestor(class_iri1, class_iri2)

Get the lowest common ancestor of the given two named classes.

Source code in src/deeponto/onto/taxonomy.py
172
173
174
def get_lowest_common_ancestor(self, class_iri1: str, class_iri2: str):
    """Get the lowest common ancestor of the given two named classes."""
    return super().get_lowest_common_ancestor(class_iri1, class_iri2)

WordnetTaxonomy(pos='n', relation='subsumption')

Bases: Taxonomy

Class for the building the taxonomy (subsumption, membership, or part-of relations) from wordnet.

Attributes:

Name Type Description
pos str

The pos-tag of entities to be extracted from wordnet.

nodes list

A list of entity ids extracted from wordnet.

edges list

A list of (parent, child) pairs w.r.t. the given hierarchical relation.

graph networkx.DiGraph

A directed hypernym graph.

Parameters:

Name Type Description Default
pos str

The pos-tag of entities to be extracted from wordnet.

'n'
relation str

The hierarchical relation for this taxonomy. Options are "subsumption", "membership", and "part". Defaults to "subsumption".

'subsumption'
Source code in src/deeponto/onto/taxonomy.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def __init__(self, pos: str = "n", relation: str = "subsumption"):
    r"""Initialise the wordnet taxonomy.

    Args:
        pos (str): The pos-tag of entities to be extracted from wordnet.
        relation (str): The hierarchical relation for this taxonomy. Options are `"subsumption"`, `"membership"`, and `"part"`. Defaults to `"subsumption"`.
    """

    self.pos = pos
    self.synsets = self.fetch_synsets(pos=pos)
    self.relation = relation
    try:
        parent_child_pairs = getattr(self, f"fetch_{relation}s")(self.synsets)
    except:
        raise ValueError(f"Input relation '{relation}' is not 'subsumption', 'membership', or 'part'.")
    super().__init__(edges=parent_child_pairs)

    # set node annotations
    for synset in self.synsets:
        try:
            self.graph.nodes[synset.name()]["name"] = synset.name().split(".")[0].replace("_", " ")
            self.graph.nodes[synset.name()]["definition"] = synset.definition()
        except:
            continue

fetch_synsets(pos='n') staticmethod

Get synsets of certain pos-tag from wordnet.

Source code in src/deeponto/onto/taxonomy.py
212
213
214
215
216
217
218
219
220
@staticmethod
def fetch_synsets(pos: str = "n"):
    """Get synsets of certain pos-tag from wordnet."""
    words = wn.words()
    synsets = set()
    for word in words:
        synsets.update(wn.synsets(word, pos=pos))
    logger.info(f'{len(synsets)} synsets (pos="{pos}") fetched.')
    return synsets

fetch_subsumptions(synsets) staticmethod

Get subsumption (hypernym-hyponym) pairs from a given set of wordnet synsets.

Source code in src/deeponto/onto/taxonomy.py
222
223
224
225
226
227
228
229
230
@staticmethod
def fetch_subsumptions(synsets: set):
    """Get subsumption (hypernym-hyponym) pairs from a given set of wordnet synsets."""
    subsumption_pairs = []
    for synset in synsets:
        for h_synset in synset.hypernyms():
            subsumption_pairs.append((h_synset.name(), synset.name()))
    logger.info(f"{len(subsumption_pairs)} subsumption (hypernym-hyponym) pairs fetched.")
    return subsumption_pairs

fetch_memberships(synsets) staticmethod

Get membership (instance hypernym-hyponym) pairs from a given set of wordnet synsets.

Source code in src/deeponto/onto/taxonomy.py
232
233
234
235
236
237
238
239
240
@staticmethod
def fetch_memberships(synsets: set):
    """Get membership (instance hypernym-hyponym) pairs from a given set of wordnet synsets."""
    membership_pairs = []
    for synset in synsets:
        for h_synset in synset.instance_hypernyms():
            membership_pairs.append((h_synset.name(), synset.name()))
    logger.info(f"{len(membership_pairs)} membership (instance hypernym-hyponym) pairs fetched.")
    return membership_pairs

fetch_parts(synsets) staticmethod

Get has-part (holonym-meronym) pairs from a given set of wordnet synsets.

Source code in src/deeponto/onto/taxonomy.py
242
243
244
245
246
247
248
249
250
@staticmethod
def fetch_parts(synsets: set):
    """Get has-part (holonym-meronym) pairs from a given set of wordnet synsets."""
    part_pairs = []
    for synset in synsets:
        for h_synset in synset.part_holonyms():
            part_pairs.append((h_synset.name(), synset.name()))
    logger.info(f"{len(part_pairs)} has-part (holonym-meronym pairs fetched.")
    return part_pairs

TaxonomyNegativeSampler(taxonomy, entity_weights=None)

Class for the efficient negative sampling with buffer over the taxonomy.

Attributes:

Name Type Description
taxonomy str

The taxonomy for negative sampling.

entity_weights Optional[dict]

A dictionary with the taxonomy entities as keys and their corresponding weights as values. Defaults to None.

Source code in src/deeponto/onto/taxonomy.py
261
262
263
264
265
266
267
268
269
270
271
272
def __init__(self, taxonomy: Taxonomy, entity_weights: Optional[dict] = None):
    self.taxonomy = taxonomy
    self.entities = self.taxonomy.nodes
    # uniform distribution if weights not provided
    self.entity_weights = entity_weights

    self._entity_probs = None
    if self.entity_weights:
        self._entity_probs = np.array([self.entity_weights[e] for e in self.entities])
        self._entity_probs = self._entity_probs / self._entity_probs.sum()
    self._buffer = []
    self._default_buffer_size = 10000

fill(buffer_size=None)

Buffer a large collection of entities sampled with replacement for faster negative sampling.

Source code in src/deeponto/onto/taxonomy.py
274
275
276
277
278
279
280
def fill(self, buffer_size: Optional[int] = None):
    """Buffer a large collection of entities sampled with replacement for faster negative sampling."""
    buffer_size = buffer_size if buffer_size else self._default_buffer_size
    if self._entity_probs:
        self._buffer = np.random.choice(self.entities, size=buffer_size, p=self._entity_probs)
    else:
        self._buffer = np.random.choice(self.entities, size=buffer_size)

sample(entity_id, n_samples, buffer_size=None)

Sample N negative samples for a given entity with replacement.

Source code in src/deeponto/onto/taxonomy.py
282
283
284
285
286
287
288
289
290
291
def sample(self, entity_id: str, n_samples: int, buffer_size: Optional[int] = None):
    """Sample N negative samples for a given entity with replacement."""
    negative_samples = []
    positive_samples = self.taxonomy.get_parents(entity_id, True)
    while len(negative_samples) < n_samples:
        if len(self._buffer) < n_samples:
            self.fill(buffer_size)
        negative_samples += list(filter(lambda x: x not in positive_samples, self._buffer[:n_samples]))
        self._buffer = self._buffer[n_samples:]  # remove the samples from the buffer
    return negative_samples[:n_samples]

Last update: September 29, 2023
Created: September 29, 2023
GitHub: @Lawhy   Personal Page: yuanhe.wiki