Source code for ontocheck.run_assessment

"""
Ontology Assessment Runner

Provides runner functions for the four OntoCheck assessment modes:

    Mode 1 -- Task-agnostic:  structural, labeling, accessibility, and
              naming-convention metrics applied to a single ontology.
    Mode 2 -- Task-specific Web ontology:  task-based Relevance/Accuracy
              validated against a knowledge graph (e.g., DBpedia via LC-QuAD).
    Mode 3 -- Task-based Scientific:  domain ontology assessed against
              competency questions encoded as SPARQL queries.
    Mode 4 -- Cross-Domain:  multiple ontologies merged and assessed against
              cross-domain competency questions.
"""

import logging
import sys
import csv
from pathlib import Path

from .altLabelCheck import mainAltLabelCheck_v_0_0_1
from .check_external_data_provider_links_ttl import check_external_data_provider_links_ttl
from .check_for_isolated_elements import check_for_isolated_elements
from .check_human_readable_license_ttl import check_human_readable_license_ttl
from .check_rdf_dump_accessibility_ttl import check_rdf_dump_accessibility_ttl
from .check_sparql_accessibility_ttl import check_sparql_accessibility_ttl
from .count_class_connected_components import count_class_connected_components
from .defCheck import mainDefCheck_v_0_0_1
from .find_duplicate_labels_from_graph import find_duplicate_labels_from_graph
from .get_properties_missing_domain_and_range import get_properties_missing_domain_and_range
from .leafNodeCheck import mainLeafNodeCheck_v_0_0_1
from .semanticConnection import mainSemanticConnection_v_0_0_1
from .mds_design_check import mds_design_check_v_0_0_1
from .spell_check import spell_check_v_0_0_1
from .check_class_name_capital import mainClassNameCapitalCheck_v_0_0_1
from .check_class_name_space import mainClassNameSpaceCheck_v_0_0_1
from .check_label import mainLabelCheck_v_0_0_1
from .class_search import mainClassSearch_v_0_0_1
from .task_based_metric import task_based_metric_v_0_0_1

METRIC_DISPATCHER = {
    "altLabelCheck": mainAltLabelCheck_v_0_0_1,
    "externalLinks": check_external_data_provider_links_ttl,
    "isolatedElements": check_for_isolated_elements,
    "humanLicense": check_human_readable_license_ttl,
    "rdfDump": check_rdf_dump_accessibility_ttl,
    "sparqlEndpoint": check_sparql_accessibility_ttl,
    "classConnections": count_class_connected_components,
    "definitionCheck": mainDefCheck_v_0_0_1,
    "duplicateLabels": find_duplicate_labels_from_graph,
    "missingDomainRange": get_properties_missing_domain_and_range,
    "leafNodeCheck": mainLeafNodeCheck_v_0_0_1,
    "semanticConnection": mainSemanticConnection_v_0_0_1,
    "mdsDesignCheck": mds_design_check_v_0_0_1,
    "spellCheck": spell_check_v_0_0_1,
    "classCapitalCheck": mainClassNameCapitalCheck_v_0_0_1,
    "classSpaceCheck": mainClassNameSpaceCheck_v_0_0_1,
    "checkLabel": mainLabelCheck_v_0_0_1,
    "searchClass": mainClassSearch_v_0_0_1,
}


# ---------------------------------------------------------------------------
# Logging helpers
# ---------------------------------------------------------------------------

def _setup_logging(output_log_file):
    """Configure file and console logging, returning the console handler."""
    logging.basicConfig(
        filename=output_log_file,
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        filemode="w",
    )
    console_handler = logging.StreamHandler(sys.stdout)
    logging.getLogger().addHandler(console_handler)
    return console_handler


def _teardown_logging(console_handler):
    """Remove the console handler added by ``_setup_logging``."""
    logging.getLogger().removeHandler(console_handler)


# ---------------------------------------------------------------------------
# Mode 1: Task-agnostic assessment
# ---------------------------------------------------------------------------

[docs] def run_ontology_assessment( ttl_file, metrics, output_log_file="assessment.log", output_csv_file="assessment_scores.csv", ): """Run task-agnostic metrics on a single ontology (Mode 1). Parameters ---------- ttl_file : str Path to the input Turtle (.ttl) ontology file. metrics : list of str or str Metric names to execute, or ``"all"`` to run every metric in ``METRIC_DISPATCHER``. output_log_file : str, optional Output log file path. output_csv_file : str, optional Output CSV file path. """ console = _setup_logging(output_log_file) if metrics == "all": metrics_to_run = list(METRIC_DISPATCHER.keys()) logging.info("Running all available metrics.") elif isinstance(metrics, (list, set, tuple)): metrics_to_run = list(metrics) else: raise ValueError( "The 'metrics' argument must be a list of metric names or the string 'all'." ) logging.info(f"--- Starting ontology assessment for: {ttl_file} ---") logging.info(f"Metrics to run: {', '.join(metrics_to_run)}") results = [] for metric_name in metrics_to_run: if metric_name not in METRIC_DISPATCHER: logging.warning(f"Metric '{metric_name}' not found. Skipping.") continue metric_function = METRIC_DISPATCHER[metric_name] logging.info(f"--- Running Metric: {metric_name} ---") try: score = metric_function(ttl_file) logging.info(f"Metric '{metric_name}' completed successfully.") results.append({"Metric": metric_name, "Score": score, "Status": "Success"}) except Exception as e: logging.error(f"Metric '{metric_name}' failed with an error: {e}", exc_info=True) results.append({"Metric": metric_name, "Score": "N/A", "Status": f"Error: {e}"}) _write_csv(results, output_csv_file) logging.info("--- Assessment Complete ---") _teardown_logging(console)
# --------------------------------------------------------------------------- # Mode 2: Task-specific Web ontology assessment # ---------------------------------------------------------------------------
[docs] def run_web_ontology_assessment( ttl_file, questions, domain_prefixes, knowledge_graph, domain_ns_fragments=None, metrics=None, output_log_file="assessment.log", output_csv_file="assessment_scores.csv", ): """Assess a Web ontology against KGQA benchmark queries (Mode 2). Runs the task-based Relevance/Accuracy assessment using competency queries drawn from a knowledge-graph question-answering benchmark (e.g., LC-QuAD over DBpedia). Optionally runs task-agnostic metrics as well. Parameters ---------- ttl_file : str Path to the ontology Turtle file. questions : str or list of str Path to a JSON/Markdown file of SPARQL queries, or a list of raw SPARQL query strings. domain_prefixes : list of str Namespace prefixes used in the SPARQL queries (e.g., ``["dbo"]``). knowledge_graph : str Path to the knowledge-graph file (Turtle/RDF) used for validation context. domain_ns_fragments : list of str or None, optional Namespace URI fragments to restrict domain-term filtering. metrics : list of str or None, optional Task-agnostic metric names to run alongside the task-based assessment. Pass ``"all"`` for every available metric. output_log_file : str, optional Output log file path. output_csv_file : str, optional Output CSV file path. """ console = _setup_logging(output_log_file) logging.info("--- Mode 2: Task-specific Web Ontology Assessment ---") logging.info(f"Ontology: {ttl_file}") logging.info(f"Knowledge graph: {knowledge_graph}") result = task_based_metric_v_0_0_1( ttl_file=ttl_file, questions=questions, domain_prefixes=domain_prefixes, domain_ns_fragments=domain_ns_fragments, ) _log_task_based_result(result) results = _task_based_result_to_rows(result) if metrics: logging.info("--- Running task-agnostic metrics ---") results.extend(_run_agnostic_metrics(ttl_file, metrics)) _write_csv(results, output_csv_file) logging.info("--- Assessment Complete ---") _teardown_logging(console) return result
# --------------------------------------------------------------------------- # Mode 3 & 4: Task-based Scientific / Cross-Domain assessment # ---------------------------------------------------------------------------
[docs] def run_task_based_assessment( ttl_files, questions, domain_prefixes, domain_ns_fragments=None, metrics=None, output_log_file="assessment.log", output_csv_file="assessment_scores.csv", ): """Assess one or more ontologies against competency questions (Modes 3/4). When a single ontology is provided this corresponds to Mode 3 (task-based scientific assessment). When multiple ontologies are provided they are merged and evaluated jointly, corresponding to Mode 4 (cross-domain assessment). Parameters ---------- ttl_files : str or list of str Path(s) to Turtle (.ttl) ontology file(s). A single path is accepted and will be wrapped in a list internally. questions : str or list of str Path to a JSON/Markdown file of SPARQL queries, or a list of raw SPARQL query strings. domain_prefixes : list of str Namespace prefixes used in the SPARQL queries (e.g., ``["mds"]``). domain_ns_fragments : list of str or None, optional Namespace URI fragments to restrict domain-term filtering. metrics : list of str or None, optional Task-agnostic metric names to run alongside the task-based assessment. Pass ``"all"`` for every available metric. output_log_file : str, optional Output log file path. output_csv_file : str, optional Output CSV file path. Returns ------- dict The result dictionary from ``task_based_metric_v_0_0_1``. """ if isinstance(ttl_files, (str, Path)): ttl_files = [ttl_files] console = _setup_logging(output_log_file) if len(ttl_files) > 1: logging.info("--- Mode 4: Cross-Domain Ontology Assessment ---") else: logging.info("--- Mode 3: Task-based Scientific Ontology Assessment ---") logging.info(f"Ontologies: {', '.join(str(f) for f in ttl_files)}") result = task_based_metric_v_0_0_1( ttl_file=ttl_files, questions=questions, domain_prefixes=domain_prefixes, domain_ns_fragments=domain_ns_fragments, ) _log_task_based_result(result) results = _task_based_result_to_rows(result) if metrics: logging.info("--- Running task-agnostic metrics ---") for f in ttl_files: logging.info(f"--- Task-agnostic metrics for: {f} ---") results.extend(_run_agnostic_metrics(str(f), metrics)) _write_csv(results, output_csv_file) logging.info("--- Assessment Complete ---") _teardown_logging(console) return result
# --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _log_task_based_result(result): """Log the task-based Relevance/Accuracy results.""" logging.info(f"Relevance (Recall): {result['relevance']:.4f}") logging.info(f"Accuracy (Precision): {result['accuracy']:.4f}") logging.info(f"Ontology terms (T_o): {result['T_o_count']}") logging.info(f"Task terms (T_a): {result['T_a_count']}") logging.info(f"Intersection: {result['intersection']}") if result["missing_from_onto"]: logging.info( f"Missing from ontology: {', '.join(sorted(result['missing_from_onto']))}" ) if result["unused_in_onto"]: logging.info( f"Unused ontology terms: {len(result['unused_in_onto'])} terms" ) def _task_based_result_to_rows(result): """Convert a task-based result dict to CSV-compatible row dicts.""" return [ {"Metric": "Relevance", "Score": f"{result['relevance']:.4f}", "Status": "Success"}, {"Metric": "Accuracy", "Score": f"{result['accuracy']:.4f}", "Status": "Success"}, {"Metric": "T_o_count", "Score": result["T_o_count"], "Status": "Success"}, {"Metric": "T_a_count", "Score": result["T_a_count"], "Status": "Success"}, {"Metric": "Intersection", "Score": result["intersection"], "Status": "Success"}, ] def _run_agnostic_metrics(ttl_file, metrics): """Run task-agnostic metrics and return a list of result row dicts.""" if metrics == "all": metrics_to_run = list(METRIC_DISPATCHER.keys()) elif isinstance(metrics, (list, set, tuple)): metrics_to_run = list(metrics) else: metrics_to_run = [] rows = [] for metric_name in metrics_to_run: if metric_name not in METRIC_DISPATCHER: logging.warning(f"Metric '{metric_name}' not found. Skipping.") continue metric_function = METRIC_DISPATCHER[metric_name] logging.info(f"--- Running Metric: {metric_name} ---") try: score = metric_function(ttl_file) logging.info(f"Metric '{metric_name}' completed successfully.") rows.append({"Metric": metric_name, "Score": score, "Status": "Success"}) except Exception as e: logging.error(f"Metric '{metric_name}' failed with an error: {e}", exc_info=True) rows.append({"Metric": metric_name, "Score": "N/A", "Status": f"Error: {e}"}) return rows def _write_csv(results, output_csv_file): """Write a list of result row dicts to a CSV file.""" try: with open(output_csv_file, "w", newline="", encoding="utf-8") as csvfile: fieldnames = ["Metric", "Score", "Status"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(results) logging.info(f"--- Successfully wrote results to {output_csv_file} ---") except IOError as e: logging.error(f"Failed to write to CSV file {output_csv_file}: {e}")