Skip to content

OntoLAMA

run_inference(config, args)

Main entry for running the OpenPrompt script.

Source code in src/deeponto/complete/ontolama/inference.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def run_inference(config, args):
    """Main entry for running the OpenPrompt script.
    """
    global CUR_TEMPLATE, CUR_VERBALIZER
    # exit()
    # init logger, create log dir and set log level, etc.
    if args.resume and args.test:
        raise Exception("cannot use flag --resume and --test together")
    if args.resume or args.test:
        config.logging.path = EXP_PATH = args.resume or args.test
    else:
        EXP_PATH = config_experiment_dir(config)
        init_logger(
            os.path.join(EXP_PATH, "log.txt"),
            config.logging.file_level,
            config.logging.console_level,
        )
        # save config to the logger directory
        save_config_to_yaml(config)

    # load dataset. The valid_dataset can be None
    train_dataset, valid_dataset, test_dataset, Processor = OntoLAMADataProcessor.load_inference_dataset(
        config, test=args.test is not None or config.learning_setting == "zero_shot"
    )

    # main
    if config.learning_setting == "full":
        res = trainer(
            EXP_PATH,
            config,
            Processor,
            resume=args.resume,
            test=args.test,
            train_dataset=train_dataset,
            valid_dataset=valid_dataset,
            test_dataset=test_dataset,
        )
    elif config.learning_setting == "few_shot":
        if config.few_shot.few_shot_sampling is None:
            raise ValueError("use few_shot setting but config.few_shot.few_shot_sampling is not specified")
        seeds = config.sampling_from_train.seed
        res = 0
        for seed in seeds:
            if not args.test:
                sampler = FewShotSampler(
                    num_examples_per_label=config.sampling_from_train.num_examples_per_label,
                    also_sample_dev=config.sampling_from_train.also_sample_dev,
                    num_examples_per_label_dev=config.sampling_from_train.num_examples_per_label_dev,
                )
                train_sampled_dataset, valid_sampled_dataset = sampler(
                    train_dataset=train_dataset, valid_dataset=valid_dataset, seed=seed
                )
                result = trainer(
                    os.path.join(EXP_PATH, f"seed-{seed}"),
                    config,
                    Processor,
                    resume=args.resume,
                    test=args.test,
                    train_dataset=train_sampled_dataset,
                    valid_dataset=valid_sampled_dataset,
                    test_dataset=test_dataset,
                )
            else:
                result = trainer(
                    os.path.join(EXP_PATH, f"seed-{seed}"),
                    config,
                    Processor,
                    test=args.test,
                    test_dataset=test_dataset,
                )
            res += result
        res /= len(seeds)
    elif config.learning_setting == "zero_shot":
        res = trainer(
            EXP_PATH,
            config,
            Processor,
            zero=True,
            train_dataset=train_dataset,
            valid_dataset=valid_dataset,
            test_dataset=test_dataset,
        )

    return config, CUR_TEMPLATE, CUR_VERBALIZER

SubsumptionSamplerBase(onto)

Base Class for Sampling Subsumption Pairs.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
30
31
32
33
34
35
36
37
38
39
40
41
def __init__(self, onto: Ontology):
    self.onto = onto
    self.progress_manager = enlighten.get_manager()

    # for faster sampling
    self.concept_iris = list(self.onto.owl_classes.keys())
    self.object_property_iris = list(self.onto.owl_object_properties.keys())
    self.sibling_concept_groups = self.onto.sibling_class_groups
    self.sibling_auxiliary_dict = defaultdict(list)
    for i, sib_group in enumerate(self.sibling_concept_groups):
        for sib in sib_group:
            self.sibling_auxiliary_dict[sib].append(i)

random_named_concept()

Randomly draw a named concept's IRI.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
43
44
45
def random_named_concept(self) -> str:
    """Randomly draw a named concept's IRI."""
    return random.choice(self.concept_iris)

random_object_property()

Randomly draw a object property's IRI.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
47
48
49
def random_object_property(self) -> str:
    """Randomly draw a object property's IRI."""
    return random.choice(self.object_property_iris)

get_siblings(concept_iri)

Get the sibling concepts of the given concept.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
51
52
53
54
55
56
def get_siblings(self, concept_iri: str):
    """Get the sibling concepts of the given concept."""
    sibling_group = self.sibling_auxiliary_dict[concept_iri]
    sibling_group = [self.sibling_concept_groups[i] for i in sibling_group]
    sibling_group = list(itertools.chain.from_iterable(sibling_group))
    return sibling_group

random_sibling(concept_iri)

Randomly draw a sibling concept for a given concept.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
58
59
60
61
62
63
64
65
def random_sibling(self, concept_iri: str) -> str:
    """Randomly draw a sibling concept for a given concept."""
    sibling_group = self.get_siblings(concept_iri)
    if sibling_group:
        return random.choice(sibling_group)
    else:
        # not every concept has a sibling concept
        return None

AtomicSubsumptionSampler(onto)

Bases: SubsumptionSamplerBase

Sampler for constructing the Atomic Subsumption Inference (SI) dataset.

Positive samples come from the entailed subsumptions.

Soft negative samples come from the pairs of randomly selected concepts, subject to passing the assumed disjointness check.

Hard negative samples come from the pairs of randomly selected sibling concepts, subject to passing the assumed disjointness check.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
88
89
90
91
92
93
94
95
def __init__(self, onto: Ontology):
    super().__init__(onto)

    # compute the sibling concept pairs for faster hard negative sampling
    self.sibling_pairs = []
    for sib_group in self.sibling_concept_groups:
        self.sibling_pairs += [(x, y) for x, y in itertools.product(sib_group, sib_group) if x != y]
    self.maximum_num_hard_negatives = len(self.sibling_pairs)

positive_sampling(num_samples=None)

Sample named concept pairs that are involved in a subsumption axiom.

An extracted pair \((C, D)\) indicates \(\mathcal{O} \models C \sqsubseteq D\) where \(\mathcal{O}\) is the input ontology.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def positive_sampling(self, num_samples: Optional[int] = None):
    r"""Sample named concept pairs that are involved in a subsumption axiom.

    An extracted pair $(C, D)$ indicates $\mathcal{O} \models C \sqsubseteq D$ where
    $\mathcal{O}$ is the input ontology.
    """
    pbar = self.progress_manager.counter(desc="Sample Positive Subsumptions", unit="pair")
    positives = []
    for concept_iri in self.concept_iris:
        owl_concept = self.onto.owl_classes[concept_iri]
        for subsumer_iri in self.onto.reasoner.get_inferred_super_entities(owl_concept, direct=False):
            positives.append((concept_iri, subsumer_iri))
            pbar.update()
    positives = list(set(sorted(positives)))
    if num_samples:
        positives = random.sample(positives, num_samples)
    print(f"Sample {len(positives)} unique positive subsumption pairs.")
    return positives

negative_sampling(negative_sample_type, num_samples, apply_assumed_disjointness_alternative=True)

Sample named concept pairs that are involved in a disjoiness (assumed) axiom, which then implies non-subsumption.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def negative_sampling(
    self,
    negative_sample_type: str,
    num_samples: int,
    apply_assumed_disjointness_alternative: bool = True,
):
    r"""Sample named concept pairs that are involved in a disjoiness (assumed) axiom, which then
    implies non-subsumption.
    """
    if negative_sample_type == "soft":
        draw_one = lambda: tuple(random.sample(self.concept_iris, k=2))
    elif negative_sample_type == "hard":
        draw_one = lambda: random.choice(self.sibling_pairs)
    else:
        raise RuntimeError(f"{negative_sample_type} not supported.")

    negatives = []
    max_iter = 2 * num_samples

    # which method to validate the negative sample
    valid_negative = self.onto.reasoner.check_assumed_disjoint
    if apply_assumed_disjointness_alternative:
        valid_negative = self.onto.reasoner.check_assumed_disjoint_alternative

    print(f"Sample {negative_sample_type} negative subsumption pairs.")
    # create two bars for process tracking
    added_bar = self.progress_manager.counter(total=num_samples, desc="Sample Negative Subsumptions", unit="pair")
    iter_bar = self.progress_manager.counter(total=max_iter, desc="#Iteration", unit="it")
    i = 0
    added = 0
    while added < num_samples and i < max_iter:
        sub_concept_iri, super_concept_iri = draw_one()
        sub_concept = self.onto.get_owl_object(sub_concept_iri)
        super_concept = self.onto.get_owl_object(super_concept_iri)
        # collect class iri if accepted
        if valid_negative(sub_concept, super_concept):
            neg = (sub_concept_iri, super_concept_iri)
            negatives.append(neg)
            added += 1
            added_bar.update(1)
            if added == num_samples:
                negatives = list(set(sorted(negatives)))
                added = len(negatives)
                added_bar.count = added
        i += 1
        iter_bar.update(1)
    negatives = list(set(sorted(negatives)))
    print(f"Sample {len(negatives)} unique positive subsumption pairs.")
    return negatives

ComplexSubsumptionSampler(onto)

Bases: SubsumptionSamplerBase

Sampler for constructing the Complex Subsumption Inference (SI) dataset.

To obtain complex concept expressions on both sides of the subsumption relationship (as a sub-concept or a super-concept), this sampler utilises the equivalence axioms in the form of \(C \equiv C_{comp}\) where \(C\) is atomic and \(C_{comp}\) is complex.

An equivalence axiom like \(C \equiv C_{comp}\) is deemed as an anchor axiom.

Positive samples are in the form of \(C_{sub} \sqsubseteq C_{comp}\) or \(C_{comp} \sqsubseteq C_{super}\) where \(C_{sub}\) is an entailed sub-concept of \(C\) and \(C_{comp}\), \(C_{super}\) is an entailed super-concept of \(C\) and \(C_{comp}\).

Negative samples are formed by replacing one of the named entities in the anchor axiom, the modified sub-concept and super-concept need to pass the assumed disjointness check to be accepted as a valid negative sample. Without loss of generality, suppose we choose \(C \sqsubseteq C_{comp}\) and replace a named entity in \(C_{comp}'\) to form \(C \sqsubseteq C_{comp}'\), then \(C\) and \(C_{comp}'\) is a valid negative only if they satisfy the assumed disjointness check.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
189
190
191
def __init__(self, onto: Ontology):
    super().__init__(onto)
    self.anchor_axioms = self.onto.get_equivalence_axioms("Classes")

positive_sampling_from_anchor(anchor_axiom)

Returns all positive subsumption pairs extracted from an anchor equivalence axiom.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def positive_sampling_from_anchor(self, anchor_axiom: OWLAxiom):
    """Returns all positive subsumption pairs extracted from an anchor equivalence axiom."""
    sub_axiom = list(anchor_axiom.asOWLSubClassOfAxioms())[0]
    atomic_concept, complex_concept = sub_axiom.getSubClass(), sub_axiom.getSuperClass()
    # determine which is the atomic concept
    if complex_concept.isClassExpressionLiteral():
        atomic_concept, complex_concept = complex_concept, atomic_concept

    # intialise the positive samples from the anchor equivalence axiom
    positives = list(anchor_axiom.asOWLSubClassOfAxioms())
    for super_concept_iri in self.onto.reasoner.get_inferred_super_entities(atomic_concept, direct=False):
        positives.append(
            self.onto.owl_data_factory.getOWLSubClassOfAxiom(
                complex_concept, self.onto.get_owl_object(super_concept_iri)
            )
        )
    for sub_concept_iri in self.onto.reasoner.get_inferred_sub_entities(atomic_concept, direct=False):
        positives.append(
            self.onto.owl_data_factory.getOWLSubClassOfAxiom(
                self.onto.get_owl_object(sub_concept_iri), complex_concept
            )
        )

    # TESTING
    # for p in positives:
    #     assert self.onto.reasoner.owl_reasoner.isEntailed(p)    

    return list(set(sorted(positives)))

positive_sampling(num_samples_per_anchor=10)

Sample positive subsumption axioms that involve one atomic and one complex concepts.

An extracted pair \((C, D)\) indicates \(\mathcal{O} \models C \sqsubseteq D\) where \(\mathcal{O}\) is the input ontology.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def positive_sampling(self, num_samples_per_anchor: Optional[int] = 10):
    r"""Sample positive subsumption axioms that involve one atomic and one complex concepts.

    An extracted pair $(C, D)$ indicates $\mathcal{O} \models C \sqsubseteq D$ where
    $\mathcal{O}$ is the input ontology.
    """
    print(f"Maximum number of positive samples for each anchor is set to {num_samples_per_anchor}.")
    pbar = self.progress_manager.counter(desc="Sample Positive Subsumptions from", unit="anchor axiom")
    positives = dict()
    for anchor in self.anchor_axioms:
        positives_from_anchor = self.positive_sampling_from_anchor(anchor)
        if num_samples_per_anchor and num_samples_per_anchor < len(positives_from_anchor):
            positives_from_anchor = random.sample(positives_from_anchor, k = num_samples_per_anchor)
        positives[str(anchor)] = positives_from_anchor
        pbar.update()
    # positives = list(set(sorted(positives)))
    print(f"Sample {sum([len(v) for v in positives.values()])} unique positive subsumption pairs.")
    return positives

negative_sampling(num_samples_per_anchor=10)

Sample negative subsumption axioms that involve one atomic and one complex concepts.

An extracted pair \((C, D)\) indicates \(C\) and \(D\) pass the assumed disjointness check.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def negative_sampling(self, num_samples_per_anchor: Optional[int] = 10):
    r"""Sample negative subsumption axioms that involve one atomic and one complex concepts.

    An extracted pair $(C, D)$ indicates $C$ and $D$ pass the [assumed disjointness check][deeponto.onto.OntologyReasoner.check_assumed_disjoint].
    """
    print(f"Maximum number of negative samples for each anchor is set to {num_samples_per_anchor}.")
    pbar = self.progress_manager.counter(desc="Sample Negative Subsumptions from", unit="anchor axiom")
    negatives = dict()
    for anchor in self.anchor_axioms:
        negatives_from_anchor = []
        i, max_iter = 0, num_samples_per_anchor + 2
        while i < max_iter and len(negatives_from_anchor) < num_samples_per_anchor:
            corrupted_anchor = self.random_corrupt(anchor)
            corrupted_sub_axiom = random.choice(list(corrupted_anchor.asOWLSubClassOfAxioms()))
            sub_concept, super_concept = corrupted_sub_axiom.getSubClass(), corrupted_sub_axiom.getSuperClass()
            if self.onto.reasoner.check_assumed_disjoint_alternative(sub_concept, super_concept):
                negatives_from_anchor.append(corrupted_sub_axiom)
            i += 1
        negatives[str(anchor)] = list(set(sorted(negatives_from_anchor)))
        pbar.update()
    # negatives = list(set(sorted(negatives)))
    print(f"Sample {sum([len(v) for v in negatives.values()])} unique positive subsumption pairs.")
    return negatives

random_corrupt(axiom)

Randomly change an IRI in the input axiom and return a new one.

Source code in src/deeponto/complete/ontolama/subsumption_sampler.py
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def random_corrupt(self, axiom: OWLAxiom):
    """Randomly change an IRI in the input axiom and return a new one.
    """
    replaced_iri = random.choice(re.findall(IRI, str(axiom)))[1:-1]
    replaced_entity = self.onto.get_owl_object(replaced_iri)
    replacement_iri = None
    if self.onto.get_entity_type(replaced_entity) == "Classes":
        replacement_iri = self.random_named_concept()
    elif self.onto.get_entity_type(replaced_entity) == "ObjectProperties":
        replacement_iri = self.random_object_property()
    else:
        # NOTE: to extend to other types of entities in future
        raise RuntimeError("Unknown type of axiom.")
    return self.onto.replace_entity(axiom, replaced_iri, replacement_iri)

OntoLAMADataProcessor()

Bases: DataProcessor

Class for processing the OntoLAMA data points.

Source code in src/deeponto/complete/ontolama/data_processor.py
25
26
27
def __init__(self):
    super().__init__()
    self.labels = ["negative", "positive"]

load_dataset(task_name, split) staticmethod

Load a specific OntoLAMA dataset from huggingface dataset hub.

Source code in src/deeponto/complete/ontolama/data_processor.py
29
30
31
32
33
@staticmethod
def load_dataset(task_name: str, split: str):
    """Load a specific OntoLAMA dataset from huggingface dataset hub."""
    # TODO: remove use_auth_token after going to public
    return load_dataset("krr-oxford/OntoLAMA", task_name, split=split, use_auth_token=True)

get_examples(task_name, split)

Load a specific OntoLAMA dataset and transform the data points into input examples for prompt-based inference.

Source code in src/deeponto/complete/ontolama/data_processor.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def get_examples(self, task_name, split):
    """Load a specific OntoLAMA dataset and transform the data points into
    input examples for prompt-based inference.
    """

    dataset = self.load_dataset(task_name, split)

    premise_name = "v_sub_concept"
    hypothesis_name = "v_super_concept"
    # different data fields for the bimnli dataset
    if "bimnli" in task_name:
        premise_name = "premise"
        hypothesis_name = "hypothesis"

    prompt_samples = []
    for samp in dataset:
        inp = InputExample(text_a=samp[premise_name], text_b=samp[hypothesis_name], label=samp["label"])
        prompt_samples.append(inp)

    return prompt_samples

load_inference_dataset(config, return_class=True, test=False) classmethod

A plm loader using a global config. It will load the train, valid, and test set (if exists) simulatenously.

Parameters:

Name Type Description Default
config CfgNode

The global config from the CfgNode.

required
return_class bool

Whether return the data processor class for future usage.

True

Returns:

Type Description
Optional[List[InputExample]]

The train dataset.

Optional[List[InputExample]]

The valid dataset.

Optional[List[InputExample]]

The test dataset.

Optional[OntoLAMADataProcessor]

The data processor object.

Source code in src/deeponto/complete/ontolama/data_processor.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@classmethod
def load_inference_dataset(cls, config: CfgNode, return_class=True, test=False):
    r"""A plm loader using a global config.
    It will load the train, valid, and test set (if exists) simulatenously.

    Args:
        config (CfgNode): The global config from the CfgNode.
        return_class (bool): Whether return the data processor class for future usage.

    Returns:
        (Optional[List[InputExample]]): The train dataset.
        (Optional[List[InputExample]]): The valid dataset.
        (Optional[List[InputExample]]): The test dataset.
        (Optional[OntoLAMADataProcessor]): The data processor object.
    """
    dataset_config = config.dataset

    processor = cls()

    train_dataset = None
    valid_dataset = None
    if not test:
        try:
            train_dataset = processor.get_examples(dataset_config.task_name, "train")
        except FileNotFoundError:
            logger.warning(f"Has no training dataset in krr-oxford/OntoLAMA/{dataset_config.task_name}.")
        try:
            valid_dataset = processor.get_examples(dataset_config.task_name, "validation")
        except FileNotFoundError:
            logger.warning(f"Has no validation dataset in krr-oxford/OntoLAMA/{dataset_config.task_name}.")

    test_dataset = None
    try:
        test_dataset = processor.get_examples(dataset_config.task_name, "test")
    except FileNotFoundError:
        logger.warning(f"Has no test dataset in krr-oxford/OntoLAMA/{dataset_config.task_name}.")
    # checking whether donwloaded.
    if (train_dataset is None) and (valid_dataset is None) and (test_dataset is None):
        logger.error(
            "Dataset is empty. Either there is no download or the path is wrong. "
            + "If not downloaded, please `cd datasets/` and `bash download_xxx.sh`"
        )
        exit()
    if return_class:
        return train_dataset, valid_dataset, test_dataset, processor
    else:
        return train_dataset, valid_dataset, test_dataset

Last update: February 2, 2024
Created: April 14, 2023
GitHub: @Lawhy   Personal Page: yuanhe.wiki