Source code for ontocheck.find_duplicate_labels_from_graph

from collections import defaultdict
from rdflib import Graph, RDF, RDFS, OWL

[docs] def find_duplicate_labels_from_graph(ttl_file): """ IO8 - Semantically Identical Classes This metric identifies semantically identical classes by checking if two IRIs in an ontology has the same value for rdfs:label or not. Params ------ ttl_file (string): path to ttl file Returns ------- duplicates (dict): dictionary of URIs of duplicated terms Author: Van Tran Version: 0.0.1 References ---------- Mc Gurk, S., Abela, C., & Debattista, J. (2017). Towards ontology quality assessment. 4th Workshop on Linked Data Quality (LDQ2017), co-located with the 14th Extended Semantic Web Conference (ESWC), Portorož, 94-106. """ ontology_graph = Graph() ontology_graph.parse(ttl_file) label_to_entities = defaultdict(list) # Define entity types of interest target_types = [OWL.Class, OWL.ObjectProperty, OWL.DatatypeProperty] for s in ontology_graph.subjects(RDFS.label, None): # Check if s has one of the target rdf:type values types = list(ontology_graph.objects(s, RDF.type)) if any(t in target_types for t in types): label = str(next(ontology_graph.objects(s, RDFS.label))).strip().lower() label_to_entities[label].append(s) # Find duplicates duplicates = {label: uris for label, uris in label_to_entities.items() if len(uris) > 1} if duplicates: print("Duplicate rdfs:label values found (case-insensitive):") for label, entities in duplicates.items(): print(f" Label: '{label}' used by:") for entity in entities: print(f" - {entity}") else: print("No duplicate rdfs:label values found.") return len(duplicates)