Files
Ceph-Calculator/app/utils/ceph_calculator.py
2025-06-11 10:37:21 +02:00

219 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
logger = logging.getLogger(__name__)
def calculate_ceph_capacity(replication_type, replicas=3, k=0, m=0, nodes=None, min_size=2, storage_unit='GB'):
"""
Calculate the maximum allowed storage usage for a Ceph cluster considering node failures.
Args:
replication_type (str): Either 'replication' or 'erasure_coding'
replicas (int): Number of replicas for replication pools (default: 3)
k (int): Number of data chunks for EC
m (int): Number of coding chunks for EC
nodes (list): List of dictionaries with 'osd_count' and 'osd_size_gb' keys
[{'osd_count': 4, 'osd_size_gb': 1000}, ...]
min_size (int): Minimum number of replicas for I/O operations (default: 2)
storage_unit (str): Storage unit, either 'GB' or 'TB' (default: 'GB')
Returns:
dict: Dictionary with max_usage_percent, max_usage_gb, max_usage_tb, raw_total and additional
information about fault tolerance
"""
if nodes is None or len(nodes) == 0:
return {
'max_usage_percent': 0,
'max_usage_gb': 0,
'max_usage_tb': 0,
'raw_total': 0,
'node_failure_tolerance': False,
'node_failure_info': 'No nodes in the cluster',
'storage_unit': storage_unit
}
# Calculate total raw storage and information for each node
raw_total_gb = 0
node_capacities = []
for node_config in nodes:
osd_count = int(node_config.get('osd_count', 0))
osd_size_gb = float(node_config.get('osd_size_gb', 0))
node_capacity = osd_count * osd_size_gb
node_capacities.append(node_capacity)
raw_total_gb += node_capacity
# Largest node (worst-case scenario during failure)
largest_node_capacity = max(node_capacities) if node_capacities else 0
# Calculate usable capacity without failure
if replication_type == 'replication':
# For replication, usable storage = raw storage / number of replicas
usable_capacity_gb = raw_total_gb / replicas
else: # Erasure Coding
# For EC, usable storage = raw storage * (k / (k + m))
if k <= 0 or m <= 0 or (k + m) <= 0:
raise ValueError("Invalid Erasure Coding parameters: k and m must be positive and their sum must be greater than 0")
usable_capacity_gb = raw_total_gb * (k / (k + m))
# Recommended maximum utilization for normal case (without failure)
max_recommended_usage_percent = 80
# Calculate OSD utilization using the formula x = (s × p) / (s + 1)
# where s = number of OSDs per server and p = percentage of total utilization
# Use the node with the most OSDs for the utilization calculation to avoid
# underestimating the usage when nodes differ in size
osds_per_server = max(
(int(node.get('osd_count', 0)) for node in nodes), default=0
) if nodes else 0
osd_usage_percent = (osds_per_server * max_recommended_usage_percent) / (osds_per_server + 1)
# Find largest OSD size for calculating capacity after OSD failure
largest_osd_size = max((float(node.get('osd_size_gb', 0)) for node in nodes), default=0)
# Calculate usable capacity after OSD failure
raw_after_osd_failure = raw_total_gb - largest_osd_size
if replication_type == 'replication':
usable_after_osd_failure = raw_after_osd_failure / replicas
else:
usable_after_osd_failure = raw_after_osd_failure * (k / (k + m))
# Calculate maximum safe usage considering OSD failure
max_usage_gb = min(
usable_capacity_gb * (osd_usage_percent / 100), # OSD utilization based on formula
usable_after_osd_failure * 0.8 # 80% of capacity after OSD failure
)
# Convert to TB if storage_unit is TB
if storage_unit == 'TB':
max_usage_tb = max_usage_gb / 1024
else:
max_usage_tb = max_usage_gb / 1024 # Always calculate TB for display
# Calculate fault tolerance considering min_size
if replication_type == 'replication':
max_failure_nodes = min(
len(nodes) - min_size, # Maximum failures based on min_size
replicas - min_size # Maximum failures based on replication factor
)
else: # Erasure Coding
max_failure_nodes = min(
len(nodes) - (k + 1), # At least k+1 nodes must remain available
m # Maximum number of coding chunks that can fail
)
# Sort nodes by size in descending order for worst-case analysis
node_capacities_sorted = sorted(node_capacities, reverse=True)
# Capacity after failure of the largest N nodes
raw_after_max_failures_gb = raw_total_gb
for i in range(min(max_failure_nodes, len(node_capacities_sorted))):
raw_after_max_failures_gb -= node_capacities_sorted[i]
# Usable capacity after maximum tolerable failures
if replication_type == 'replication':
usable_after_max_failures_gb = raw_after_max_failures_gb / min_size
else: # Erasure Coding
remaining_m = m - max_failure_nodes
if remaining_m <= 0:
raise ValueError("Invalid Erasure Coding configuration: remaining coding chunks must be positive")
usable_after_max_failures_gb = raw_after_max_failures_gb * (k / (k + remaining_m))
# Calculate usable capacity after failure of largest node
raw_after_failure_gb = raw_total_gb - largest_node_capacity
if replication_type == 'replication':
usable_after_failure_gb = raw_after_failure_gb / min_size
else: # Erasure Coding
usable_after_failure_gb = raw_after_failure_gb * (k / (k + m))
# Check if there is enough storage space after a node failure
node_failure_tolerance = True
# Check minimum requirements for nodes
if replication_type == 'replication':
if len(nodes) < min_size:
node_failure_tolerance = False
elif usable_after_failure_gb < max_usage_gb:
node_failure_tolerance = False
else: # Erasure Coding
if len(nodes) <= k:
node_failure_tolerance = False
elif usable_after_failure_gb < max_usage_gb:
node_failure_tolerance = False
# Check for multiple failures
multi_failure_tolerance = False
if max_failure_nodes > 0:
multi_failure_tolerance = (
usable_after_max_failures_gb >= max_usage_gb and
len(nodes) > max_failure_nodes
)
# Maximum safe usage considering a possible node failure
safe_usage_percent = 0
safe_usage_gb = 0
safe_usage_tb = 0
node_failure_info = ""
if node_failure_tolerance:
safe_usage_percent = max_recommended_usage_percent
safe_usage_gb = max_usage_gb
safe_usage_tb = max_usage_tb
if multi_failure_tolerance and max_failure_nodes > 1:
node_failure_info = f"The cluster can tolerate failure of up to {max_failure_nodes} nodes (min_size={min_size})."
else:
node_failure_info = f"The cluster can tolerate failure of the largest node (min_size={min_size})."
else:
safe_usage_percent = round((usable_after_failure_gb / usable_capacity_gb) * 100 * 0.8)
safe_usage_gb = usable_after_failure_gb * 0.8
safe_usage_tb = safe_usage_gb / 1024
if len(nodes) <= (min_size if replication_type == 'replication' else k + m - min(m, 1)):
node_failure_info = f"CRITICAL: Too few nodes ({len(nodes)}) for the configured min_size={min_size}. "
node_failure_info += f"At least {min_size + 1 if replication_type == 'replication' else k + m + 1 - min(m, 1)} nodes needed."
else:
# Unit for display
unit_display = "TB" if storage_unit == "TB" else "GB"
node_size_display = round(largest_node_capacity / 1024, 2) if storage_unit == "TB" else round(largest_node_capacity, 2)
node_failure_info = (f"WARNING: The cluster does not have enough free capacity to tolerate a failure of the largest node "
f"({node_size_display} {unit_display}). "
f"Maximum safe usage: {safe_usage_percent}%")
# Calculate for single OSD failure
osd_failure_tolerance = False
osd_failure_info = "No OSDs in the cluster"
if nodes and any(int(node.get('osd_count', 0)) > 0 for node in nodes):
osd_failure_tolerance = usable_after_osd_failure >= max_usage_gb
# Unit for display
unit_display = "TB" if storage_unit == "TB" else "GB"
osd_size_display = round(largest_osd_size / 1024, 2) if storage_unit == "TB" else round(largest_osd_size, 2)
osd_failure_info = f"The cluster can tolerate failure of the largest OSD (min_size={min_size})." if osd_failure_tolerance else \
f"WARNING: The cluster does not have enough free capacity to tolerate failure of the largest OSD ({osd_size_display} {unit_display})."
# Return values with proper unit conversion
result = {
'max_usage_percent': round(osd_usage_percent, 2),
'max_usage_gb': round(max_usage_gb, 2),
'max_usage_tb': round(max_usage_tb, 2),
'raw_total': round(raw_total_gb / 1024, 2) if storage_unit == 'TB' else round(raw_total_gb, 2),
'node_failure_tolerance': node_failure_tolerance,
'node_failure_info': node_failure_info,
'multi_failure_tolerance': multi_failure_tolerance,
'max_failure_nodes': max_failure_nodes,
'osd_failure_tolerance': osd_failure_tolerance,
'osd_failure_info': osd_failure_info,
'largest_node_gb': round(largest_node_capacity / 1024, 2) if storage_unit == 'TB' else round(largest_node_capacity, 2),
'raw_after_failure_gb': round(raw_after_failure_gb / 1024, 2) if storage_unit == 'TB' else round(raw_after_failure_gb, 2),
'usable_after_failure_gb': round(usable_after_failure_gb / 1024, 2) if storage_unit == 'TB' else round(usable_after_failure_gb, 2),
'raw_after_max_failures_gb': round(raw_after_max_failures_gb / 1024, 2) if storage_unit == 'TB' else round(raw_after_max_failures_gb, 2),
'usable_after_max_failures_gb': round(usable_after_max_failures_gb / 1024, 2) if storage_unit == 'TB' else round(usable_after_max_failures_gb, 2),
'min_size': min_size,
'osds_per_server': osds_per_server,
'storage_unit': storage_unit
}
return result