Source code for ontocheck.check_for_isolated_elements

from rdflib import OWL, SKOS, RDF, RDFS, Graph, Namespace, DCAT, URIRef, BNode
import networkx as nx
import rdflib
from .helpers.helpers import _parse_rdf_list, _constructed_class_has_atomic_class, _get_operands

[docs] def check_for_isolated_elements(ttl_file: str): """ C1 - Number of isolated elements Analyze an OWL ontology in Turtle format to identify isolated atomic classes and isolated properties. Definitions ----------- - Atomic classes are named classes (with URI) that are NOT constructed classes (i.e., they do not have owl:unionOf, owl:intersectionOf, or owl:complementOf). - A class (atomic or constructed with URI) is considered connected if it: * participates in rdfs:subClassOf, owl:equivalentClass, or owl:disjointWith relations involving atomic classes, OR * is used as domain or range of properties and contains at least one atomic class inside its construction. - A property is considered connected if it is related by any of: rdfs:subPropertyOf, owl:inverseOf, owl:propertyDisjointWith, or owl:equivalentProperty. Author: Van Tran Version: 0.0.1 Parameters ---------- ttl_file : str File path to the ontology Turtle (.ttl) file. Prints ------ Lists of isolated atomic classes and isolated properties. Notes ----- - Only named classes explicitly declared as owl:Class are considered. - Only properties explicitly declared as owl:ObjectProperty or owl:DatatypeProperty are considered. - Relations checked for classes include rdfs:subClassOf, owl:equivalentClass, owl:disjointWith, and usage as domain or range of properties. - Relations checked for properties include rdfs:subPropertyOf, owl:inverseOf, owl:propertyDisjointWith, and owl:equivalentProperty. References ----- Mc Gurk, S., Abela, C., & Debattista, J. (2017). Towards ontology quality assessment. 4th Workshop on Linked Data Quality (LDQ2017), co-located with the 14th Extended Semantic Web Conference (ESWC), Portorož, 94-106. """ g = Graph() g.parse(ttl_file, format="turtle") # All named classes named_classes = set(g.subjects(RDF.type, OWL.Class)) # Identify atomic classes: named classes without OWL class constructors atomic_classes = set() for c in named_classes: # Check for Boolean class expressions has_boolean = any( len(list(g.objects(c, p))) > 0 for p in [OWL.unionOf, OWL.intersectionOf, OWL.complementOf] ) # Check for restrictions in equivalentClass or subClassOf has_restriction = False for p in [OWL.equivalentClass, RDFS.subClassOf]: for obj in g.objects(c, p): if (obj, RDF.type, OWL.Restriction) in g: has_restriction = True print(f"[DEBUG] {c} has restriction via {p}{obj}") if has_boolean: print(f"[DEBUG] {c} excluded because it has a boolean expression") if not has_boolean and not has_restriction: atomic_classes.add(c) print(f"[INFO] Added atomic class: {c}") else: print(f"[INFO] Skipped non-atomic class: {c}") properties = set(g.subjects(RDF.type, OWL.ObjectProperty)) | set(g.subjects(RDF.type, OWL.DatatypeProperty)) connected_atomic = set() # Relations linking atomic classes for pred in [RDFS.subClassOf, OWL.equivalentClass, OWL.disjointWith]: for s, o in g.subject_objects(pred): if s in atomic_classes and o in atomic_classes: connected_atomic.add(s) connected_atomic.add(o) else: # If constructed classes involved, check their atomic content if _constructed_class_has_atomic_class(s, g, atomic_classes): if isinstance(o, URIRef) and o in atomic_classes: connected_atomic.add(o) if _constructed_class_has_atomic_class(o, g, atomic_classes): if isinstance(s, URIRef) and s in atomic_classes: connected_atomic.add(s) # Consider domain and range usage of properties for prop in properties: for domain in g.objects(prop, RDFS.domain): if _constructed_class_has_atomic_class(domain, g, atomic_classes): if isinstance(domain, URIRef): connected_atomic.add(domain) for range_ in g.objects(prop, RDFS.range): if _constructed_class_has_atomic_class(range_, g, atomic_classes): if isinstance(range_, URIRef): connected_atomic.add(range_) isolated_atomic_classes = atomic_classes - connected_atomic # Properties isolation connected_properties = set() for pred in [RDFS.subPropertyOf, OWL.inverseOf, OWL.propertyDisjointWith, OWL.equivalentProperty, SKOS.broader]: for s, o in g.subject_objects(pred): if isinstance(s, URIRef): connected_properties.add(s) if isinstance(o, URIRef): connected_properties.add(o) isolated_properties = properties - connected_properties print("Isolated Atomic Classes:") for cls in sorted(isolated_atomic_classes): print(f" {cls}") ratio_iso_to_total_class = len(isolated_atomic_classes)/len(atomic_classes) print(f"Number of isolated classes: {len(isolated_atomic_classes)}") print(f"/nProportion of isolated classes: {ratio_iso_to_total_class}") print("\nIsolated Properties:") for prop in sorted(isolated_properties): print(f" {prop}") ratio_iso_to_total_prop = len(isolated_properties)/len(properties) print(f"Number of isolated properties: {len(isolated_properties)}") print(f"/nProportion of isolated properties: {ratio_iso_to_total_prop}") return { "Number of isolated classes": {len(isolated_atomic_classes)}, "Proportion of isolated classes": {ratio_iso_to_total_class}, "Number of isolated properties": {len(isolated_properties)}, "Proportion of isolated properties": {ratio_iso_to_total_prop} }