Source code for ontocheck.class_search

from rdflib import Graph, URIRef
from rdflib.namespace import RDF, RDFS, OWL

[docs] def mainClassSearch_v_0_0_1(ontology_graph_or_path, search_term: str) -> list: """ Class Name Substring Search Evaluates an ontology file to find all class names that contain a specified substring, irrespective of capitalization. This metric assesses whether certain concepts can be resolved from a semantic or string basis, which is important for identifying overlapping concepts, naming consistency, or potential duplicates (e.g., 'VoltageRating' and 'VoltAgeRange'). Author: Redad Mehdi Version: 0.0.1 Parameters: ----------- ontology_graph_or_path : str or rdflib.Graph Path to the Turtle (.ttl) file to analyze, or a pre-loaded rdflib.Graph object. search_term : str The string to search for within the class names (case-insensitive). Returns: -------- list of dict A list containing dictionaries of the matched classes. Each dictionary contains: - 'class_name': The extracted local name of the class (str). - 'uri': The full URI string of the class (str). Returns an empty list if no matches are found. Notes: ------ The function identifies class names by looking at subjects typed as owl:Class or rdfs:Class. It extracts the local name by splitting the URI at the last '#' or '/' character before performing the case-insensitive comparison. Example: -------- >>> matches = mainClassSearch_v_0_0_1('dataset.ttl', 'VOLtage') >>> print(f"Found {len(matches)} matching classes.") """ # Load the graph if a file path is provided, otherwise use the passed graph if isinstance(ontology_graph_or_path, str): g = Graph() g.parse(ontology_graph_or_path, format="turtle") else: g = ontology_graph_or_path search_term_lower = search_term.lower() matches = [] # Get all unique classes (both OWL and RDFS classes just to be safe) classes = set(g.subjects(RDF.type, OWL.Class)).union(g.subjects(RDF.type, RDFS.Class)) for class_uri in classes: # We only want to check actual URIs, not BNodes (blank nodes) if isinstance(class_uri, URIRef): uri_str = str(class_uri) # Extract the local class name (heuristic: get everything after the last '#' or '/') if '#' in uri_str: local_name = uri_str.split('#')[-1] else: local_name = uri_str.split('/')[-1] # Case-insensitive check if search_term_lower in local_name.lower(): matches.append({ "class_name": local_name, "uri": uri_str }) print("\n" + "="*40) print(f"SEARCH RESULTS FOR: '{search_term}'") print("="*40) if matches: print(f"Found {len(matches)} matching class(es):\n") for match in matches: print(f" * Class: {match['class_name']}") print(f" URI: {match['uri']}\n") else: print(f"No classes found containing '{search_term}'.\n") print("="*40 + "\n") return matches