Source code for ontocheck.check_label

from .helpers.helpers import _find_all_named_classes, _export_missing_labels_template,  _print_classes_without_labels, _print_classes_with_labels,  _print_label_summary_statistics, _analyze_label_coverage
from rdflib import Graph

[docs] def mainLabelCheck_v_0_0_1(ttl_file, show="all", export_template=None): """ RDFS Label Coverage Analysis Analyze an OWL ontology in Turtle (ttl) format to assess the coverage and quality of RDFS labels (rdfs:label) across all named classes This main function loads an ontology file, identifies all named classes, and provides comprehensive analysis of label coverage with various display options and export capabilities Definitions ----------- - Named classes: Classes with URIRef identifiers that are explicitly declared as owl:Class or rdfs:Class, or participate in rdfs:subClassOf relations - Valid labels: rdfs:label values that are non-empty strings after whitespace trimming. Empty strings and whitespace-only labels are not counted as valid labels - Coverage percentage: The proportion of named classes that have at least one valid rdfs:label Author: Rishabh Kundu Version: 0.0.1 Parameters ---------- ttl_file : str Path to the ontology Turtle (.ttl) file to analyze -- input file show : str, optional Display option controlling what information to show: - "all" (default): Shows summary statistics, classes with labels, and classes without labels - "with": Shows only classes that have rdfs:label - "without": Shows only classes that lack rdfs:label - "summary": Shows only summary statistics export_template : str, optional Export a CSV template file for classes missing rdfs:label. Provide the desired output filename (e.g. "missing_labels_in_classes.csv"). Default is None (no export) Returns ------- None This function does not directly return values. It prints analysis results to your terminal/CLI and optionally exports a CSV template file. The function may exit early on errors (file not found, parsing errors, or no classes found) Output Information ------------------ When executed successfully, the analysis provides: - Total number of named classes analyzed - Number of classes with valid rdfs:label properties - Number of classes lacking valid rdfs:label properties - Coverage percentage of classes with rdfs:label - Prefixed class name and full URI/IRI for each class Error Handling -------------- - FileNotFoundError: When the specified TTL file cannot be found - Parsing errors: When the TTL file cannot be parsed as valid Turtle - Empty ontology: When no named classes are found in the ontology Notes ----- - Only named classes (URIRef instances) are considered in the analysis - Empty strings and whitespace-only labels are treated as missing labels - Classes are displayed with both their prefixed name and full URI/IRI - show and export_template parameters are set to default values ("all" and None) - thus, CSV export request must be explicitly mentioned .. note:: Claude AI (Sonnet 4.6) was employed chiefly to support documentation efforts. Examples -------- Basic usage (show all): mainLabelCheck_v_0_0_1("ontology.ttl") Show only summary: mainLabelCheck_v_0_0_1("ontology.ttl", show="summary") Show only classes missing labels: mainLabelCheck_v_0_0_1("ontology.ttl", show="without") Export CSV template for missing labels: mainLabelCheck_v_0_0_1("ontology.ttl", export_template="missing_labels.csv") Export template while showing summary only: mainLabelCheck_v_0_0_1("ontology.ttl", show="summary", export_template="missing_labels.csv") - in the afore a desired export path can also be inserted """ # Validate "show" parameter of main function valid_show_options = ["all", "with", "without", "summary"] if show not in valid_show_options: print(f"Error: Invalid 'show' parameter. Must be one of {valid_show_options}") return g = Graph() try: print(f"Parsing file: {ttl_file}...") # Bind common prefixes for cleaner output (future users can add more here) g.bind("mds", "https://cwrusdle.bitbucket.io/mds/") g.bind("cco", "https://www.commoncoreontologies.org/") g.bind("obo", "http://purl.obolibrary.org/obo/") g.bind("owl", "http://www.w3.org/2002/07/owl#") g.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") g.bind("skos", "http://www.w3.org/2004/02/skos/core#") g.parse(ttl_file, format="turtle") except FileNotFoundError: print(f"Error: The file '{ttl_file}' was not found.") return except Exception as e: print(f"Error: An error occurred while parsing the TTL file: {e}") return # Find all named classes all_classes = _find_all_named_classes(g) if not all_classes: print("No named classes found in the ontology.") return # Analyze rdfs:label coverage classes_with_label, classes_without_label = _analyze_label_coverage(g, all_classes) # Display results as desired by user if show in ["summary", "all"]: _print_label_summary_statistics(g, classes_with_label, classes_without_label, all_classes) if show in ["with", "all"]: _print_classes_with_labels(g, classes_with_label) if show in ["without", "all"]: _print_classes_without_labels(g, classes_without_label) # Export template if requested explicitly by user if export_template and classes_without_label: _export_missing_labels_template(g, classes_without_label, export_template) elif export_template and not classes_without_label: print("All classes have rdfs:label — no template needed!")