219 lines
10 KiB
Python
219 lines
10 KiB
Python
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def calculate_ceph_capacity(replication_type, replicas=3, k=0, m=0, nodes=None, min_size=2, storage_unit='GB'):
|
||
"""
|
||
Calculate the maximum allowed storage usage for a Ceph cluster considering node failures.
|
||
|
||
Args:
|
||
replication_type (str): Either 'replication' or 'erasure_coding'
|
||
replicas (int): Number of replicas for replication pools (default: 3)
|
||
k (int): Number of data chunks for EC
|
||
m (int): Number of coding chunks for EC
|
||
nodes (list): List of dictionaries with 'osd_count' and 'osd_size_gb' keys
|
||
[{'osd_count': 4, 'osd_size_gb': 1000}, ...]
|
||
min_size (int): Minimum number of replicas for I/O operations (default: 2)
|
||
storage_unit (str): Storage unit, either 'GB' or 'TB' (default: 'GB')
|
||
|
||
Returns:
|
||
dict: Dictionary with max_usage_percent, max_usage_gb, max_usage_tb, raw_total and additional
|
||
information about fault tolerance
|
||
"""
|
||
if nodes is None or len(nodes) == 0:
|
||
return {
|
||
'max_usage_percent': 0,
|
||
'max_usage_gb': 0,
|
||
'max_usage_tb': 0,
|
||
'raw_total': 0,
|
||
'node_failure_tolerance': False,
|
||
'node_failure_info': 'No nodes in the cluster',
|
||
'storage_unit': storage_unit
|
||
}
|
||
|
||
# Calculate total raw storage and information for each node
|
||
raw_total_gb = 0
|
||
node_capacities = []
|
||
|
||
for node_config in nodes:
|
||
osd_count = int(node_config.get('osd_count', 0))
|
||
osd_size_gb = float(node_config.get('osd_size_gb', 0))
|
||
node_capacity = osd_count * osd_size_gb
|
||
node_capacities.append(node_capacity)
|
||
raw_total_gb += node_capacity
|
||
|
||
# Largest node (worst-case scenario during failure)
|
||
largest_node_capacity = max(node_capacities) if node_capacities else 0
|
||
|
||
# Calculate usable capacity without failure
|
||
if replication_type == 'replication':
|
||
# For replication, usable storage = raw storage / number of replicas
|
||
usable_capacity_gb = raw_total_gb / replicas
|
||
else: # Erasure Coding
|
||
# For EC, usable storage = raw storage * (k / (k + m))
|
||
if k <= 0 or m <= 0 or (k + m) <= 0:
|
||
raise ValueError("Invalid Erasure Coding parameters: k and m must be positive and their sum must be greater than 0")
|
||
usable_capacity_gb = raw_total_gb * (k / (k + m))
|
||
|
||
# Recommended maximum utilization for normal case (without failure)
|
||
max_recommended_usage_percent = 80
|
||
|
||
# Calculate OSD utilization using the formula x = (s × p) / (s + 1)
|
||
# where s = number of OSDs per server and p = percentage of total utilization
|
||
# Use the node with the most OSDs for the utilization calculation to avoid
|
||
# underestimating the usage when nodes differ in size
|
||
osds_per_server = max(
|
||
(int(node.get('osd_count', 0)) for node in nodes), default=0
|
||
) if nodes else 0
|
||
osd_usage_percent = (osds_per_server * max_recommended_usage_percent) / (osds_per_server + 1)
|
||
|
||
# Find largest OSD size for calculating capacity after OSD failure
|
||
largest_osd_size = max((float(node.get('osd_size_gb', 0)) for node in nodes), default=0)
|
||
|
||
# Calculate usable capacity after OSD failure
|
||
raw_after_osd_failure = raw_total_gb - largest_osd_size
|
||
if replication_type == 'replication':
|
||
usable_after_osd_failure = raw_after_osd_failure / replicas
|
||
else:
|
||
usable_after_osd_failure = raw_after_osd_failure * (k / (k + m))
|
||
|
||
# Calculate maximum safe usage considering OSD failure
|
||
max_usage_gb = min(
|
||
usable_capacity_gb * (osd_usage_percent / 100), # OSD utilization based on formula
|
||
usable_after_osd_failure * 0.8 # 80% of capacity after OSD failure
|
||
)
|
||
|
||
# Convert to TB if storage_unit is TB
|
||
if storage_unit == 'TB':
|
||
max_usage_tb = max_usage_gb / 1024
|
||
else:
|
||
max_usage_tb = max_usage_gb / 1024 # Always calculate TB for display
|
||
|
||
# Calculate fault tolerance considering min_size
|
||
if replication_type == 'replication':
|
||
max_failure_nodes = min(
|
||
len(nodes) - min_size, # Maximum failures based on min_size
|
||
replicas - min_size # Maximum failures based on replication factor
|
||
)
|
||
else: # Erasure Coding
|
||
max_failure_nodes = min(
|
||
len(nodes) - (k + 1), # At least k+1 nodes must remain available
|
||
m # Maximum number of coding chunks that can fail
|
||
)
|
||
|
||
# Sort nodes by size in descending order for worst-case analysis
|
||
node_capacities_sorted = sorted(node_capacities, reverse=True)
|
||
|
||
# Capacity after failure of the largest N nodes
|
||
raw_after_max_failures_gb = raw_total_gb
|
||
for i in range(min(max_failure_nodes, len(node_capacities_sorted))):
|
||
raw_after_max_failures_gb -= node_capacities_sorted[i]
|
||
|
||
# Usable capacity after maximum tolerable failures
|
||
if replication_type == 'replication':
|
||
usable_after_max_failures_gb = raw_after_max_failures_gb / min_size
|
||
else: # Erasure Coding
|
||
remaining_m = m - max_failure_nodes
|
||
if remaining_m <= 0:
|
||
raise ValueError("Invalid Erasure Coding configuration: remaining coding chunks must be positive")
|
||
usable_after_max_failures_gb = raw_after_max_failures_gb * (k / (k + remaining_m))
|
||
|
||
# Calculate usable capacity after failure of largest node
|
||
raw_after_failure_gb = raw_total_gb - largest_node_capacity
|
||
if replication_type == 'replication':
|
||
usable_after_failure_gb = raw_after_failure_gb / min_size
|
||
else: # Erasure Coding
|
||
usable_after_failure_gb = raw_after_failure_gb * (k / (k + m))
|
||
|
||
# Check if there is enough storage space after a node failure
|
||
node_failure_tolerance = True
|
||
|
||
# Check minimum requirements for nodes
|
||
if replication_type == 'replication':
|
||
if len(nodes) < min_size:
|
||
node_failure_tolerance = False
|
||
elif usable_after_failure_gb < max_usage_gb:
|
||
node_failure_tolerance = False
|
||
else: # Erasure Coding
|
||
if len(nodes) <= k:
|
||
node_failure_tolerance = False
|
||
elif usable_after_failure_gb < max_usage_gb:
|
||
node_failure_tolerance = False
|
||
|
||
# Check for multiple failures
|
||
multi_failure_tolerance = False
|
||
if max_failure_nodes > 0:
|
||
multi_failure_tolerance = (
|
||
usable_after_max_failures_gb >= max_usage_gb and
|
||
len(nodes) > max_failure_nodes
|
||
)
|
||
|
||
# Maximum safe usage considering a possible node failure
|
||
safe_usage_percent = 0
|
||
safe_usage_gb = 0
|
||
safe_usage_tb = 0
|
||
node_failure_info = ""
|
||
|
||
if node_failure_tolerance:
|
||
safe_usage_percent = max_recommended_usage_percent
|
||
safe_usage_gb = max_usage_gb
|
||
safe_usage_tb = max_usage_tb
|
||
|
||
if multi_failure_tolerance and max_failure_nodes > 1:
|
||
node_failure_info = f"The cluster can tolerate failure of up to {max_failure_nodes} nodes (min_size={min_size})."
|
||
else:
|
||
node_failure_info = f"The cluster can tolerate failure of the largest node (min_size={min_size})."
|
||
else:
|
||
safe_usage_percent = round((usable_after_failure_gb / usable_capacity_gb) * 100 * 0.8)
|
||
safe_usage_gb = usable_after_failure_gb * 0.8
|
||
safe_usage_tb = safe_usage_gb / 1024
|
||
|
||
if len(nodes) <= (min_size if replication_type == 'replication' else k + m - min(m, 1)):
|
||
node_failure_info = f"CRITICAL: Too few nodes ({len(nodes)}) for the configured min_size={min_size}. "
|
||
node_failure_info += f"At least {min_size + 1 if replication_type == 'replication' else k + m + 1 - min(m, 1)} nodes needed."
|
||
else:
|
||
# Unit for display
|
||
unit_display = "TB" if storage_unit == "TB" else "GB"
|
||
node_size_display = round(largest_node_capacity / 1024, 2) if storage_unit == "TB" else round(largest_node_capacity, 2)
|
||
|
||
node_failure_info = (f"WARNING: The cluster does not have enough free capacity to tolerate a failure of the largest node "
|
||
f"({node_size_display} {unit_display}). "
|
||
f"Maximum safe usage: {safe_usage_percent}%")
|
||
|
||
# Calculate for single OSD failure
|
||
osd_failure_tolerance = False
|
||
osd_failure_info = "No OSDs in the cluster"
|
||
|
||
if nodes and any(int(node.get('osd_count', 0)) > 0 for node in nodes):
|
||
osd_failure_tolerance = usable_after_osd_failure >= max_usage_gb
|
||
|
||
# Unit for display
|
||
unit_display = "TB" if storage_unit == "TB" else "GB"
|
||
osd_size_display = round(largest_osd_size / 1024, 2) if storage_unit == "TB" else round(largest_osd_size, 2)
|
||
|
||
osd_failure_info = f"The cluster can tolerate failure of the largest OSD (min_size={min_size})." if osd_failure_tolerance else \
|
||
f"WARNING: The cluster does not have enough free capacity to tolerate failure of the largest OSD ({osd_size_display} {unit_display})."
|
||
|
||
# Return values with proper unit conversion
|
||
result = {
|
||
'max_usage_percent': round(osd_usage_percent, 2),
|
||
'max_usage_gb': round(max_usage_gb, 2),
|
||
'max_usage_tb': round(max_usage_tb, 2),
|
||
'raw_total': round(raw_total_gb / 1024, 2) if storage_unit == 'TB' else round(raw_total_gb, 2),
|
||
'node_failure_tolerance': node_failure_tolerance,
|
||
'node_failure_info': node_failure_info,
|
||
'multi_failure_tolerance': multi_failure_tolerance,
|
||
'max_failure_nodes': max_failure_nodes,
|
||
'osd_failure_tolerance': osd_failure_tolerance,
|
||
'osd_failure_info': osd_failure_info,
|
||
'largest_node_gb': round(largest_node_capacity / 1024, 2) if storage_unit == 'TB' else round(largest_node_capacity, 2),
|
||
'raw_after_failure_gb': round(raw_after_failure_gb / 1024, 2) if storage_unit == 'TB' else round(raw_after_failure_gb, 2),
|
||
'usable_after_failure_gb': round(usable_after_failure_gb / 1024, 2) if storage_unit == 'TB' else round(usable_after_failure_gb, 2),
|
||
'raw_after_max_failures_gb': round(raw_after_max_failures_gb / 1024, 2) if storage_unit == 'TB' else round(raw_after_max_failures_gb, 2),
|
||
'usable_after_max_failures_gb': round(usable_after_max_failures_gb / 1024, 2) if storage_unit == 'TB' else round(usable_after_max_failures_gb, 2),
|
||
'min_size': min_size,
|
||
'osds_per_server': osds_per_server,
|
||
'storage_unit': storage_unit
|
||
}
|
||
|
||
return result |