Source code for better_lbnl_os.core.benchmarking

"""Core benchmarking engine for building energy performance analysis.

This module provides pure, framework-agnostic functions for benchmarking building
energy performance against reference statistics. It handles the comparison of
change-point model coefficients and provides performance ratings and targets.
"""

import logging

import numpy as np

from better_lbnl_os.constants.building_types import BuildingSpaceType
from better_lbnl_os.core.changepoint import ChangePointModelResult
from better_lbnl_os.models.benchmarking import (
    BenchmarkResult,
    BenchmarkStatistics,
    CoefficientBenchmarkResult,
    EnergyTypeBenchmarkResult,
    EnergyTypeBenchmarkStatistics,
)
from better_lbnl_os.utils.statistics import (
    assign_performance_rating,
    calculate_coefficient_statistics,
    calculate_percentile_from_z_score,
    calculate_z_score,
)

logger = logging.getLogger(__name__)


[docs] def create_statistics_from_models( change_point_models: list[ChangePointModelResult], building_ids: list[str] | None = None ) -> BenchmarkStatistics: """Create benchmark statistics from a collection of change-point models. Args: change_point_models: List of fitted change-point models building_ids: Optional list of building identifiers for logging Returns: BenchmarkStatistics with median and standard deviation for each coefficient Raises: ValueError: If no valid models provided """ if not change_point_models: raise ValueError("At least one change-point model must be provided") # Collect coefficient values by energy type electricity_coeffs = { "heating_slope": [], "heating_change_point": [], "baseload": [], "cooling_change_point": [], "cooling_slope": [], } fossil_fuel_coeffs = { "heating_slope": [], "heating_change_point": [], "baseload": [], "cooling_change_point": [], "cooling_slope": [], } # Extract coefficients from each model for i, model in enumerate(change_point_models): building_ids[i] if building_ids and i < len(building_ids) else f"building_{i}" # For simplicity, assume ELECTRICITY models have cooling dominance # and FOSSIL_FUEL models have heating dominance # In real implementation, this would be determined by model type or other criteria if model.cooling_slope is not None and model.cooling_slope > 0: # Treat as electricity model electricity_coeffs["heating_slope"].append(model.heating_slope) electricity_coeffs["heating_change_point"].append(model.heating_change_point) electricity_coeffs["baseload"].append(model.baseload) electricity_coeffs["cooling_change_point"].append(model.cooling_change_point) electricity_coeffs["cooling_slope"].append(model.cooling_slope) if model.heating_slope is not None and model.heating_slope < 0: # Treat as fossil fuel model fossil_fuel_coeffs["heating_slope"].append(model.heating_slope) fossil_fuel_coeffs["heating_change_point"].append(model.heating_change_point) fossil_fuel_coeffs["baseload"].append(model.baseload) fossil_fuel_coeffs["cooling_change_point"].append(model.cooling_change_point) fossil_fuel_coeffs["cooling_slope"].append(model.cooling_slope) # Create statistics for each energy type electricity_stats = EnergyTypeBenchmarkStatistics( heating_slope=calculate_coefficient_statistics(electricity_coeffs["heating_slope"]), heating_change_point=calculate_coefficient_statistics( electricity_coeffs["heating_change_point"] ), baseload=calculate_coefficient_statistics(electricity_coeffs["baseload"]), cooling_change_point=calculate_coefficient_statistics( electricity_coeffs["cooling_change_point"] ), cooling_slope=calculate_coefficient_statistics(electricity_coeffs["cooling_slope"]), ) fossil_fuel_stats = EnergyTypeBenchmarkStatistics( heating_slope=calculate_coefficient_statistics(fossil_fuel_coeffs["heating_slope"]), heating_change_point=calculate_coefficient_statistics( fossil_fuel_coeffs["heating_change_point"] ), baseload=calculate_coefficient_statistics(fossil_fuel_coeffs["baseload"]), cooling_change_point=calculate_coefficient_statistics( fossil_fuel_coeffs["cooling_change_point"] ), cooling_slope=calculate_coefficient_statistics(fossil_fuel_coeffs["cooling_slope"]), ) return BenchmarkStatistics(ELECTRICITY=electricity_stats, FOSSIL_FUEL=fossil_fuel_stats)
[docs] def get_target_coefficient_value( coefficient_name: str, current_value: float, median: float, stdev: float, savings_target: str = "NOMINAL", ) -> float: """Calculate target coefficient value based on savings target level. Args: coefficient_name: Name of the coefficient current_value: Current coefficient value median: Reference median stdev: Reference standard deviation savings_target: Target level ("CONSERVATIVE", "NOMINAL", "AGGRESSIVE") Returns: Target coefficient value """ # For coefficients where larger values are better if coefficient_name in ["cooling_change_point", "heating_slope"]: if savings_target == "CONSERVATIVE": target = median - stdev elif savings_target == "NOMINAL": target = median else: # AGGRESSIVE target = median + stdev / 2 # Don't suggest worse performance than current return max(current_value, target) # For coefficients where smaller values are better else: if savings_target == "CONSERVATIVE": target = median + stdev elif savings_target == "NOMINAL": target = median else: # AGGRESSIVE target = median - stdev / 2 # Don't suggest worse performance than current return min(current_value, target)
[docs] def benchmark_coefficient( coefficient_name: str, coefficient_value: float | None, median: float | None, stdev: float | None, savings_target: str, floor_area: float, ) -> CoefficientBenchmarkResult: """Benchmark a single coefficient against reference statistics. Args: coefficient_name: Name of the coefficient coefficient_value: Current coefficient value median: Reference median stdev: Reference standard deviation savings_target: Savings target level floor_area: Building floor area Returns: CoefficientBenchmarkResult with comparison metrics """ logger.debug( f"Benchmarking {coefficient_name}: value={coefficient_value}, " f"median={median}, stdev={stdev}" ) result = CoefficientBenchmarkResult( coefficient_value=coefficient_value, coefficient_value_with_area=coefficient_value * floor_area if coefficient_value else None, sample_median=median, sample_standard_deviation=stdev, ) # Return early if we don't have enough data if any(x is None for x in [coefficient_value, median, stdev]): return result # Calculate z-score and percentile z_score = calculate_z_score(coefficient_value, median, stdev) # For coefficients where larger values are better (cooling_change_point, heating_slope) # reverse the z-score for percentile calculation if coefficient_name in ["cooling_change_point", "heating_slope"]: percentile = calculate_percentile_from_z_score(z_score) # For rating, use negative z-score (higher values = better performance = negative z-score for rating) rating_z_score = -z_score # Calculate target levels conservative_level = median - stdev nominal_level = median aggressive_level = median + stdev / 2 else: # For other coefficients, smaller values are better percentile = calculate_percentile_from_z_score(-z_score) rating_z_score = z_score # Calculate target levels conservative_level = median + stdev nominal_level = median aggressive_level = median - stdev / 2 # Assign performance rating rating = assign_performance_rating(rating_z_score) # Calculate target value target_value = get_target_coefficient_value( coefficient_name, coefficient_value, median, stdev, savings_target ) # Update result result.percentile = percentile result.rating = rating result.conservative_level = conservative_level result.nominal_level = nominal_level result.aggressive_level = aggressive_level result.target_value = target_value return result
[docs] def benchmark_building( change_point_results: dict[str, ChangePointModelResult], benchmark_statistics: BenchmarkStatistics, floor_area: float, savings_target: str = "NOMINAL", building_id: str | None = None, ) -> BenchmarkResult: """Benchmark a building's change-point models against reference statistics. Args: change_point_results: Dictionary mapping energy types to change-point results benchmark_statistics: Reference statistics to compare against floor_area: Building floor area savings_target: Savings target level ("CONSERVATIVE", "NOMINAL", "AGGRESSIVE") building_id: Optional building identifier Returns: BenchmarkResult with complete comparison metrics Raises: ValueError: If required inputs are missing """ if not change_point_results: raise ValueError("At least one change-point result must be provided") if not benchmark_statistics: raise ValueError("Benchmark statistics must be provided") if floor_area <= 0: raise ValueError("Floor area must be positive") logger.info(f"Benchmarking building {building_id or 'unknown'}") result = BenchmarkResult( building_id=building_id, floor_area=floor_area, savings_target=savings_target ) # Benchmark each energy type for energy_type, cp_result in change_point_results.items(): if energy_type not in ["ELECTRICITY", "FOSSIL_FUEL"]: logger.warning(f"Unknown energy type: {energy_type}") continue # Get benchmark statistics for this energy type energy_stats = getattr(benchmark_statistics, energy_type, None) if not energy_stats: logger.warning(f"No benchmark statistics for {energy_type}") continue # Create energy type result energy_result = EnergyTypeBenchmarkResult() # Benchmark each coefficient coefficients = { "heating_slope": cp_result.heating_slope, "heating_change_point": cp_result.heating_change_point, "baseload": cp_result.baseload, "cooling_change_point": cp_result.cooling_change_point, "cooling_slope": cp_result.cooling_slope, } for coeff_name, coeff_value in coefficients.items(): # Get reference statistics for this coefficient coeff_stats = getattr(energy_stats, coeff_name, None) if not coeff_stats: continue # Benchmark the coefficient coeff_result = benchmark_coefficient( coefficient_name=coeff_name, coefficient_value=coeff_value, median=coeff_stats.median, stdev=coeff_stats.stdev, savings_target=savings_target, floor_area=floor_area, ) # Store result setattr(energy_result, coeff_name, coeff_result) # Store energy type result setattr(result, energy_type, energy_result) return result
[docs] def calculate_portfolio_statistics(building_results: list[BenchmarkResult]) -> dict[str, float]: """Calculate portfolio-level statistics from individual building results. Args: building_results: List of benchmark results for buildings in portfolio Returns: Dictionary with portfolio-level metrics """ if not building_results: return {} stats = { "total_buildings": len(building_results), "total_floor_area": sum(r.floor_area for r in building_results if r.floor_area), } # Calculate performance distribution for energy_type in ["ELECTRICITY", "FOSSIL_FUEL"]: ratings = [] percentiles = [] for result in building_results: overall_rating = result.get_overall_rating(energy_type) if overall_rating: ratings.append(overall_rating) avg_percentile = result.get_average_percentile(energy_type) if avg_percentile is not None: percentiles.append(avg_percentile) if ratings: stats[f"{energy_type.lower()}_ratings"] = { "Good": ratings.count("Good"), "Typical": ratings.count("Typical"), "Poor": ratings.count("Poor"), } if percentiles: stats[f"{energy_type.lower()}_avg_percentile"] = np.mean(percentiles) return stats
# Global loader instance for convenience _default_loader = None
[docs] def get_reference_statistics( country_code: str, building_type: str | BuildingSpaceType, custom_data_path: str | None = None ) -> BenchmarkStatistics | None: """Get reference statistics for benchmarking. Args: country_code: ISO country code (e.g., 'US', 'MX') building_type: Building type enum or string custom_data_path: Optional path to custom JSON manifest Returns: BenchmarkStatistics if available, None otherwise """ from better_lbnl_os.data.loader import ReferenceStatisticsLoader global _default_loader if custom_data_path or _default_loader is None: loader = ReferenceStatisticsLoader(custom_data_path) if not custom_data_path: _default_loader = loader else: loader = _default_loader if isinstance(building_type, str): try: building_type = BuildingSpaceType.from_benchmark_id(building_type) except ValueError: logger.error(f"Invalid building type: {building_type}") return None return loader.get_statistics(country_code, building_type)
[docs] def benchmark_with_reference( change_point_results: dict[str, ChangePointModelResult], floor_area: float, country_code: str, building_type: str | BuildingSpaceType, custom_statistics_path: str | None = None, savings_target: str = "NOMINAL", building_id: str | None = None, ) -> BenchmarkResult: """Benchmark building using reference statistics. Allows using either built-in statistics or custom data. Args: change_point_results: Dictionary mapping energy types to change-point results floor_area: Building floor area country_code: ISO country code (e.g., 'US', 'MX') building_type: Building type enum or string custom_statistics_path: Optional path to custom JSON manifest savings_target: Savings target level ("CONSERVATIVE", "NOMINAL", "AGGRESSIVE") building_id: Optional building identifier Returns: BenchmarkResult with complete comparison metrics Raises: ValueError: If no reference statistics are available or inputs are invalid """ statistics = get_reference_statistics(country_code, building_type, custom_statistics_path) if not statistics: raise ValueError(f"No reference statistics available for {country_code}/{building_type}") return benchmark_building( change_point_results, statistics, floor_area, savings_target, building_id )
[docs] def list_available_reference_statistics( custom_data_path: str | None = None, ) -> list[tuple[str, BuildingSpaceType]]: """List all available reference statistics. Args: custom_data_path: Optional path to custom JSON manifest Returns: List of (country_code, building_type) tuples """ from better_lbnl_os.data.loader import ReferenceStatisticsLoader loader = ReferenceStatisticsLoader(custom_data_path) return loader.list_available()
__all__ = [ "benchmark_building", "benchmark_coefficient", "benchmark_with_reference", "calculate_portfolio_statistics", "create_statistics_from_models", "get_reference_statistics", "get_target_coefficient_value", "list_available_reference_statistics", ]