Serverless vs. Provisioned

Choosing the Right Compute Model

Overview

Choosing between serverless and provisioned compute involves trading off cost, performance, and operational complexity. Serverless offers zero idle cost but higher per-query cost, while provisioned offers predictable performance at fixed cost.

Comparison Overview

Feature Comparison

Detailed Comparison

Cost Analysis

Aspect	Serverless	Provisioned
Pricing	Pay-per-query	Fixed hourly rate
Idle cost	Zero	Full cost
Compute cost	Higher per-unit	Lower per-unit
Storage cost	Same	Same
Network cost	Same	Same

Performance Analysis

Aspect	Serverless	Provisioned
Startup time	10-60 seconds	Zero (always running)
Maximum throughput	Limited	Higher
Consistency	Variable	Consistent
SLA	Best effort	Guaranteed

Operational Complexity

Aspect	Serverless	Provisioned
Setup	Minimal	Significant
Maintenance	None	Continuous
Scaling	Automatic	Manual/auto
Monitoring	Provided	Custom

Break-Even Analysis

When to Use Serverless

# Break-even analysis: Serverless vs. Provisioned

def calculate_break_even(
    provisioned_hourly_cost: float,
    serverless_cost_per_tb: float,
    avg_queries_per_month: int,
    avg_data_per_query_tb: float
) -> dict:
    """Calculate break-even point"""

    # Monthly provisioned cost (24 * 30 hours)
    monthly_provisioned_cost = provisioned_hourly_cost * 24 * 30

    # Monthly serverless cost
    monthly_serverless_cost = (
        avg_queries_per_month *
        avg_data_per_query_tb *
        serverless_cost_per_tb
    )

    # Break-even
    is_serverless_cheap = monthly_serverless_cost < monthly_provisioned_cost

    return {
        'monthly_provisioned_cost': monthly_provisioned_cost,
        'monthly_serverless_cost': monthly_serverless_cost,
        'is_serverless_cheaper': is_serverless_cheap,
        'savings': monthly_provisioned_cost - monthly_serverless_cost if is_serverless_cheap else 0,
        'recommendation': 'serverless' if is_serverless_cheap else 'provisioned'
    }

# Example: BigQuery vs. Provisioned Warehouse
analysis = calculate_break_even(
    provisioned_hourly_cost=5.00,  # Redshift DC2.large
    serverless_cost_per_tb=5.00,   # BigQuery on-demand
    avg_queries_per_month=1000,
    avg_data_per_query_tb=0.1        # 100GB per query
)

print(analysis)
# {
#     'monthly_provisioned_cost': 3600.00,
#     'monthly_serverless_cost': 500.00,
#     'is_serverless_cheaper': True,
#     'savings': 3100.00,
#     'recommendation': 'serverless'
# }

Break-Even Calculator

# General break-even calculator

class ComputeModelComparator:
    """Compare serverless vs. provisioned"""

    def __init__(
        self,
        provisioned_specs: dict,
        serverless_specs: dict
    ):
        self.provisioned = provisioned_specs
        self.serverless = serverless_specs

    def calculate_break_even_hours(self) -> float:
        """Calculate break-even point in hours per month"""

        # Monthly provisioned cost
        monthly_provisioned = (
            self.provisioned['hourly_cost'] * 24 * 30
        )

        # Serverless cost per hour (average)
        hourly_serverless_cost = (
            self.serverless['cost_per_tb'] *
            self.serverless['avg_tb_per_hour']
        )

        # Break-even hours
        break_even_hours = monthly_provisioned / hourly_serverless_cost

        return break_even_hours

    def recommend(self, avg_monthly_hours: float) -> str:
        """Recommend compute model based on usage"""

        break_even = self.calculate_break_even_hours()

        if avg_monthly_hours < break_even * 0.8:
            return 'serverless'
        elif avg_monthly_hours > break_even * 1.2:
            return 'provisioned'
        else:
            return 'hybrid'  # Use both

# Example usage
comparator = ComputeModelComparator(
    provisioned_specs={
        'hourly_cost': 5.00,
        'instance_type': 'dc2.large'
    },
    serverless_specs={
        'cost_per_tb': 5.00,
        'avg_tb_per_hour': 1.0
    }
)

break_even_hours = comparator.calculate_break_even_hours()
print(f"Break-even: {break_even_hours:.1f} hours/month")

recommendation = comparator.recommend(avg_monthly_hours=100)
print(f"Recommendation: {recommendation}")

Platform Comparison

BigQuery (Serverless)

# BigQuery serverless analysis

def bigquery_cost_analysis(
    monthly_queries: int,
    avg_data_per_query_tb: float
) -> dict:
    """Analyze BigQuery serverless costs"""

    # BigQuery pricing (on-demand)
    cost_per_tb = 5.00  # USD

    # Monthly cost
    monthly_cost = monthly_queries * avg_data_per_query_tb * cost_per_tb

    # Cost optimization
    if monthly_queries > 1000:
        # Consider flat-rate pricing
        flat_rate_cost = 10000  # USD/month for 500 TB
        if monthly_cost > flat_rate_cost:
            return {
                'model': 'flat-rate',
                'monthly_cost': flat_rate_cost,
                'savings': monthly_cost - flat_rate_cost
            }

    return {
        'model': 'on-demand',
        'monthly_cost': monthly_cost,
        'queries': monthly_queries,
        'avg_data_per_query_tb': avg_data_per_query_tb
    }

Redshift (Provisioned)

# Redshift provisioned analysis

def redshift_cost_analysis(
    node_type: str,
    num_nodes: int,
    monthly_hours: float = 730  # 24 * 30
) -> dict:
    """Analyze Redshift provisioned costs"""

    # Redshift pricing (us-east-1)
    pricing = {
        'dc2.large': 0.25,     # $0.25/hour
        'dc2.8xlarge': 2.00,   # $2.00/hour
        'ra3.xlplus': 1.15,    # $1.15/hour
        'ra3.4xlarge': 4.60    # $4.60/hour
    }

    hourly_cost = pricing.get(node_type, 1.00) * num_nodes
    monthly_cost = hourly_cost * monthly_hours

    return {
        'node_type': node_type,
        'num_nodes': num_nodes,
        'hourly_cost': hourly_cost,
        'monthly_cost': monthly_cost,
        'cost_per_tb': monthly_cost / (monthly_hours * 1.0)  # Assume 1TB/hour
    }

Hybrid Approach

Auto-Tiering

Implementation

# Hybrid routing strategy

class HybridComputeRouter:
    """Route queries to optimal compute model"""

    def __init__(self):
        self.serverless_client = BigQueryClient()
        self.provisioned_client = RedshiftClient()

    def route_query(
        self,
        query: str,
        estimated_data_tb: float,
        complexity: str = 'simple'
    ) -> str:
        """Route query to optimal compute model"""

        # Simple queries -> Serverless
        if complexity == 'simple' and estimated_data_tb < 1.0:
            return self.serverless_client.execute(query)

        # Complex queries -> Provisioned
        elif complexity == 'complex' or estimated_data_tb >= 1.0:
            return self.provisioned_client.execute(query)

        # Mid-range -> Check cost
        else:
            serverless_cost = estimated_data_tb * 5.00
            provisioned_cost = 5.00  # Fixed hourly cost

            if serverless_cost < provisioned_cost:
                return self.serverless_client.execute(query)
            else:
                return self.provisioned_client.execute(query)

Decision Framework

Decision Tree

Serverless vs. Provisioned

Best Practices

# Decision guidelines

def choose_compute_model(
    monthly_hours: float,
    avg_query_size_tb: float,
    requires_consistency: bool,
    max_latency_ms: int
) -> str:
    """Choose optimal compute model"""

    # Serverless for infrequent usage
    if monthly_hours < 50:
        return 'serverless'

    # Provisioned for heavy usage
    if monthly_hours > 300:
        return 'provisioned'

    # Check latency requirements
    if max_latency_ms < 1000:  # < 1 second
        return 'provisioned'  # Serverless has startup overhead

    # Check consistency
    if requires_consistency:
        return 'provisioned'

    # Default to serverless for mid-range usage
    return 'serverless'

Key Takeaways

Serverless: Zero idle cost, higher per-unit cost
Provisioned: Fixed cost, lower per-unit cost
Break-even: Calculate based on usage patterns
Infrequent: Serverless (< 50 hours/month)
Frequent: Provisioned (> 300 hours/month)
Hybrid: Use both for optimal cost
Latency: Provisioned for low-latency requirements
Use When: All workloads, optimize for usage patterns

Back to Module 7