import logging logger = logging.getLogger(__name__) def calculate_ceph_capacity(replication_type, replicas=3, k=0, m=0, nodes=None, min_size=2, storage_unit='GB'): """ Calculate the maximum allowed storage usage for a Ceph cluster considering node failures. Args: replication_type (str): Either 'replication' or 'erasure_coding' replicas (int): Number of replicas for replication pools (default: 3) k (int): Number of data chunks for EC m (int): Number of coding chunks for EC nodes (list): List of dictionaries with 'osd_count' and 'osd_size_gb' keys [{'osd_count': 4, 'osd_size_gb': 1000}, ...] min_size (int): Minimum number of replicas for I/O operations (default: 2) storage_unit (str): Storage unit, either 'GB' or 'TB' (default: 'GB') Returns: dict: Dictionary with max_usage_percent, max_usage_gb, max_usage_tb, raw_total and additional information about fault tolerance """ if nodes is None or len(nodes) == 0: return { 'max_usage_percent': 0, 'max_usage_gb': 0, 'max_usage_tb': 0, 'raw_total': 0, 'node_failure_tolerance': False, 'node_failure_info': 'No nodes in the cluster', 'storage_unit': storage_unit } # Calculate total raw storage and information for each node raw_total_gb = 0 node_capacities = [] for node_config in nodes: osd_count = int(node_config.get('osd_count', 0)) osd_size_gb = float(node_config.get('osd_size_gb', 0)) node_capacity = osd_count * osd_size_gb node_capacities.append(node_capacity) raw_total_gb += node_capacity # Largest node (worst-case scenario during failure) largest_node_capacity = max(node_capacities) if node_capacities else 0 # Calculate usable capacity without failure if replication_type == 'replication': # For replication, usable storage = raw storage / number of replicas usable_capacity_gb = raw_total_gb / replicas else: # Erasure Coding # For EC, usable storage = raw storage * (k / (k + m)) if k <= 0 or m <= 0 or (k + m) <= 0: raise ValueError("Invalid Erasure Coding parameters: k and m must be positive and their sum must be greater than 0") usable_capacity_gb = raw_total_gb * (k / (k + m)) # Recommended maximum utilization for normal case (without failure) max_recommended_usage_percent = 80 # Calculate OSD utilization using the formula x = (s × p) / (s + 1) # where s = number of OSDs per server and p = percentage of total utilization # Use the node with the most OSDs for the utilization calculation to avoid # underestimating the usage when nodes differ in size osds_per_server = max( (int(node.get('osd_count', 0)) for node in nodes), default=0 ) if nodes else 0 osd_usage_percent = (osds_per_server * max_recommended_usage_percent) / (osds_per_server + 1) # Find largest OSD size for calculating capacity after OSD failure largest_osd_size = max((float(node.get('osd_size_gb', 0)) for node in nodes), default=0) # Calculate usable capacity after OSD failure raw_after_osd_failure = raw_total_gb - largest_osd_size if replication_type == 'replication': usable_after_osd_failure = raw_after_osd_failure / replicas else: usable_after_osd_failure = raw_after_osd_failure * (k / (k + m)) # Calculate maximum safe usage considering OSD failure max_usage_gb = min( usable_capacity_gb * (osd_usage_percent / 100), # OSD utilization based on formula usable_after_osd_failure * 0.8 # 80% of capacity after OSD failure ) # Convert to TB if storage_unit is TB if storage_unit == 'TB': max_usage_tb = max_usage_gb / 1024 else: max_usage_tb = max_usage_gb / 1024 # Always calculate TB for display # Calculate fault tolerance considering min_size if replication_type == 'replication': max_failure_nodes = min( len(nodes) - min_size, # Maximum failures based on min_size replicas - min_size # Maximum failures based on replication factor ) else: # Erasure Coding max_failure_nodes = min( len(nodes) - (k + 1), # At least k+1 nodes must remain available m # Maximum number of coding chunks that can fail ) # Sort nodes by size in descending order for worst-case analysis node_capacities_sorted = sorted(node_capacities, reverse=True) # Capacity after failure of the largest N nodes raw_after_max_failures_gb = raw_total_gb for i in range(min(max_failure_nodes, len(node_capacities_sorted))): raw_after_max_failures_gb -= node_capacities_sorted[i] # Usable capacity after maximum tolerable failures if replication_type == 'replication': usable_after_max_failures_gb = raw_after_max_failures_gb / min_size else: # Erasure Coding remaining_m = m - max_failure_nodes if remaining_m <= 0: raise ValueError("Invalid Erasure Coding configuration: remaining coding chunks must be positive") usable_after_max_failures_gb = raw_after_max_failures_gb * (k / (k + remaining_m)) # Calculate usable capacity after failure of largest node raw_after_failure_gb = raw_total_gb - largest_node_capacity if replication_type == 'replication': usable_after_failure_gb = raw_after_failure_gb / min_size else: # Erasure Coding usable_after_failure_gb = raw_after_failure_gb * (k / (k + m)) # Check if there is enough storage space after a node failure node_failure_tolerance = True # Check minimum requirements for nodes if replication_type == 'replication': if len(nodes) < min_size: node_failure_tolerance = False elif usable_after_failure_gb < max_usage_gb: node_failure_tolerance = False else: # Erasure Coding if len(nodes) <= k: node_failure_tolerance = False elif usable_after_failure_gb < max_usage_gb: node_failure_tolerance = False # Check for multiple failures multi_failure_tolerance = False if max_failure_nodes > 0: multi_failure_tolerance = ( usable_after_max_failures_gb >= max_usage_gb and len(nodes) > max_failure_nodes ) # Maximum safe usage considering a possible node failure safe_usage_percent = 0 safe_usage_gb = 0 safe_usage_tb = 0 node_failure_info = "" if node_failure_tolerance: safe_usage_percent = max_recommended_usage_percent safe_usage_gb = max_usage_gb safe_usage_tb = max_usage_tb if multi_failure_tolerance and max_failure_nodes > 1: node_failure_info = f"The cluster can tolerate failure of up to {max_failure_nodes} nodes (min_size={min_size})." else: node_failure_info = f"The cluster can tolerate failure of the largest node (min_size={min_size})." else: safe_usage_percent = round((usable_after_failure_gb / usable_capacity_gb) * 100 * 0.8) safe_usage_gb = usable_after_failure_gb * 0.8 safe_usage_tb = safe_usage_gb / 1024 if len(nodes) <= (min_size if replication_type == 'replication' else k + m - min(m, 1)): node_failure_info = f"CRITICAL: Too few nodes ({len(nodes)}) for the configured min_size={min_size}. " node_failure_info += f"At least {min_size + 1 if replication_type == 'replication' else k + m + 1 - min(m, 1)} nodes needed." else: # Unit for display unit_display = "TB" if storage_unit == "TB" else "GB" node_size_display = round(largest_node_capacity / 1024, 2) if storage_unit == "TB" else round(largest_node_capacity, 2) node_failure_info = (f"WARNING: The cluster does not have enough free capacity to tolerate a failure of the largest node " f"({node_size_display} {unit_display}). " f"Maximum safe usage: {safe_usage_percent}%") # Calculate for single OSD failure osd_failure_tolerance = False osd_failure_info = "No OSDs in the cluster" if nodes and any(int(node.get('osd_count', 0)) > 0 for node in nodes): osd_failure_tolerance = usable_after_osd_failure >= max_usage_gb # Unit for display unit_display = "TB" if storage_unit == "TB" else "GB" osd_size_display = round(largest_osd_size / 1024, 2) if storage_unit == "TB" else round(largest_osd_size, 2) osd_failure_info = f"The cluster can tolerate failure of the largest OSD (min_size={min_size})." if osd_failure_tolerance else \ f"WARNING: The cluster does not have enough free capacity to tolerate failure of the largest OSD ({osd_size_display} {unit_display})." # Return values with proper unit conversion result = { 'max_usage_percent': round(osd_usage_percent, 2), 'max_usage_gb': round(max_usage_gb, 2), 'max_usage_tb': round(max_usage_tb, 2), 'raw_total': round(raw_total_gb / 1024, 2) if storage_unit == 'TB' else round(raw_total_gb, 2), 'node_failure_tolerance': node_failure_tolerance, 'node_failure_info': node_failure_info, 'multi_failure_tolerance': multi_failure_tolerance, 'max_failure_nodes': max_failure_nodes, 'osd_failure_tolerance': osd_failure_tolerance, 'osd_failure_info': osd_failure_info, 'largest_node_gb': round(largest_node_capacity / 1024, 2) if storage_unit == 'TB' else round(largest_node_capacity, 2), 'raw_after_failure_gb': round(raw_after_failure_gb / 1024, 2) if storage_unit == 'TB' else round(raw_after_failure_gb, 2), 'usable_after_failure_gb': round(usable_after_failure_gb / 1024, 2) if storage_unit == 'TB' else round(usable_after_failure_gb, 2), 'raw_after_max_failures_gb': round(raw_after_max_failures_gb / 1024, 2) if storage_unit == 'TB' else round(raw_after_max_failures_gb, 2), 'usable_after_max_failures_gb': round(usable_after_max_failures_gb / 1024, 2) if storage_unit == 'TB' else round(usable_after_max_failures_gb, 2), 'min_size': min_size, 'osds_per_server': osds_per_server, 'storage_unit': storage_unit } return result