Skip to content

Serverless vs. Provisioned

Choosing the Right Compute Model


Overview

Choosing between serverless and provisioned compute involves trading off cost, performance, and operational complexity. Serverless offers zero idle cost but higher per-query cost, while provisioned offers predictable performance at fixed cost.


Comparison Overview

Feature Comparison


Detailed Comparison

Cost Analysis

AspectServerlessProvisioned
PricingPay-per-queryFixed hourly rate
Idle costZeroFull cost
Compute costHigher per-unitLower per-unit
Storage costSameSame
Network costSameSame

Performance Analysis

AspectServerlessProvisioned
Startup time10-60 secondsZero (always running)
Maximum throughputLimitedHigher
ConsistencyVariableConsistent
SLABest effortGuaranteed

Operational Complexity

AspectServerlessProvisioned
SetupMinimalSignificant
MaintenanceNoneContinuous
ScalingAutomaticManual/auto
MonitoringProvidedCustom

Break-Even Analysis

When to Use Serverless

# Break-even analysis: Serverless vs. Provisioned
def calculate_break_even(
provisioned_hourly_cost: float,
serverless_cost_per_tb: float,
avg_queries_per_month: int,
avg_data_per_query_tb: float
) -> dict:
"""Calculate break-even point"""
# Monthly provisioned cost (24 * 30 hours)
monthly_provisioned_cost = provisioned_hourly_cost * 24 * 30
# Monthly serverless cost
monthly_serverless_cost = (
avg_queries_per_month *
avg_data_per_query_tb *
serverless_cost_per_tb
)
# Break-even
is_serverless_cheap = monthly_serverless_cost < monthly_provisioned_cost
return {
'monthly_provisioned_cost': monthly_provisioned_cost,
'monthly_serverless_cost': monthly_serverless_cost,
'is_serverless_cheaper': is_serverless_cheap,
'savings': monthly_provisioned_cost - monthly_serverless_cost if is_serverless_cheap else 0,
'recommendation': 'serverless' if is_serverless_cheap else 'provisioned'
}
# Example: BigQuery vs. Provisioned Warehouse
analysis = calculate_break_even(
provisioned_hourly_cost=5.00, # Redshift DC2.large
serverless_cost_per_tb=5.00, # BigQuery on-demand
avg_queries_per_month=1000,
avg_data_per_query_tb=0.1 # 100GB per query
)
print(analysis)
# {
# 'monthly_provisioned_cost': 3600.00,
# 'monthly_serverless_cost': 500.00,
# 'is_serverless_cheaper': True,
# 'savings': 3100.00,
# 'recommendation': 'serverless'
# }

Break-Even Calculator

# General break-even calculator
class ComputeModelComparator:
"""Compare serverless vs. provisioned"""
def __init__(
self,
provisioned_specs: dict,
serverless_specs: dict
):
self.provisioned = provisioned_specs
self.serverless = serverless_specs
def calculate_break_even_hours(self) -> float:
"""Calculate break-even point in hours per month"""
# Monthly provisioned cost
monthly_provisioned = (
self.provisioned['hourly_cost'] * 24 * 30
)
# Serverless cost per hour (average)
hourly_serverless_cost = (
self.serverless['cost_per_tb'] *
self.serverless['avg_tb_per_hour']
)
# Break-even hours
break_even_hours = monthly_provisioned / hourly_serverless_cost
return break_even_hours
def recommend(self, avg_monthly_hours: float) -> str:
"""Recommend compute model based on usage"""
break_even = self.calculate_break_even_hours()
if avg_monthly_hours < break_even * 0.8:
return 'serverless'
elif avg_monthly_hours > break_even * 1.2:
return 'provisioned'
else:
return 'hybrid' # Use both
# Example usage
comparator = ComputeModelComparator(
provisioned_specs={
'hourly_cost': 5.00,
'instance_type': 'dc2.large'
},
serverless_specs={
'cost_per_tb': 5.00,
'avg_tb_per_hour': 1.0
}
)
break_even_hours = comparator.calculate_break_even_hours()
print(f"Break-even: {break_even_hours:.1f} hours/month")
recommendation = comparator.recommend(avg_monthly_hours=100)
print(f"Recommendation: {recommendation}")

Platform Comparison

BigQuery (Serverless)

# BigQuery serverless analysis
def bigquery_cost_analysis(
monthly_queries: int,
avg_data_per_query_tb: float
) -> dict:
"""Analyze BigQuery serverless costs"""
# BigQuery pricing (on-demand)
cost_per_tb = 5.00 # USD
# Monthly cost
monthly_cost = monthly_queries * avg_data_per_query_tb * cost_per_tb
# Cost optimization
if monthly_queries > 1000:
# Consider flat-rate pricing
flat_rate_cost = 10000 # USD/month for 500 TB
if monthly_cost > flat_rate_cost:
return {
'model': 'flat-rate',
'monthly_cost': flat_rate_cost,
'savings': monthly_cost - flat_rate_cost
}
return {
'model': 'on-demand',
'monthly_cost': monthly_cost,
'queries': monthly_queries,
'avg_data_per_query_tb': avg_data_per_query_tb
}

Redshift (Provisioned)

# Redshift provisioned analysis
def redshift_cost_analysis(
node_type: str,
num_nodes: int,
monthly_hours: float = 730 # 24 * 30
) -> dict:
"""Analyze Redshift provisioned costs"""
# Redshift pricing (us-east-1)
pricing = {
'dc2.large': 0.25, # $0.25/hour
'dc2.8xlarge': 2.00, # $2.00/hour
'ra3.xlplus': 1.15, # $1.15/hour
'ra3.4xlarge': 4.60 # $4.60/hour
}
hourly_cost = pricing.get(node_type, 1.00) * num_nodes
monthly_cost = hourly_cost * monthly_hours
return {
'node_type': node_type,
'num_nodes': num_nodes,
'hourly_cost': hourly_cost,
'monthly_cost': monthly_cost,
'cost_per_tb': monthly_cost / (monthly_hours * 1.0) # Assume 1TB/hour
}

Hybrid Approach

Auto-Tiering

Implementation

# Hybrid routing strategy
class HybridComputeRouter:
"""Route queries to optimal compute model"""
def __init__(self):
self.serverless_client = BigQueryClient()
self.provisioned_client = RedshiftClient()
def route_query(
self,
query: str,
estimated_data_tb: float,
complexity: str = 'simple'
) -> str:
"""Route query to optimal compute model"""
# Simple queries -> Serverless
if complexity == 'simple' and estimated_data_tb < 1.0:
return self.serverless_client.execute(query)
# Complex queries -> Provisioned
elif complexity == 'complex' or estimated_data_tb >= 1.0:
return self.provisioned_client.execute(query)
# Mid-range -> Check cost
else:
serverless_cost = estimated_data_tb * 5.00
provisioned_cost = 5.00 # Fixed hourly cost
if serverless_cost < provisioned_cost:
return self.serverless_client.execute(query)
else:
return self.provisioned_client.execute(query)

Decision Framework

Decision Tree


Serverless vs. Provisioned

Best Practices

# Decision guidelines
def choose_compute_model(
monthly_hours: float,
avg_query_size_tb: float,
requires_consistency: bool,
max_latency_ms: int
) -> str:
"""Choose optimal compute model"""
# Serverless for infrequent usage
if monthly_hours < 50:
return 'serverless'
# Provisioned for heavy usage
if monthly_hours > 300:
return 'provisioned'
# Check latency requirements
if max_latency_ms < 1000: # < 1 second
return 'provisioned' # Serverless has startup overhead
# Check consistency
if requires_consistency:
return 'provisioned'
# Default to serverless for mid-range usage
return 'serverless'

Key Takeaways

  1. Serverless: Zero idle cost, higher per-unit cost
  2. Provisioned: Fixed cost, lower per-unit cost
  3. Break-even: Calculate based on usage patterns
  4. Infrequent: Serverless (< 50 hours/month)
  5. Frequent: Provisioned (> 300 hours/month)
  6. Hybrid: Use both for optimal cost
  7. Latency: Provisioned for low-latency requirements
  8. Use When: All workloads, optimize for usage patterns

Back to Module 7