Skip to content

Tagging Strategies

Cost Attribution and Governance


Overview

Tagging enables cost attribution, governance, and optimization by associating metadata with cloud resources. It’s essential for chargeback, budget control, and cost optimization.


Tagging Architecture

Tag Taxonomy

Tag Categories:

  • Business: Cost center, department, project
  • Technical: Service, resource type, owner
  • Operational: Environment, purpose, compliance

Tag Implementation

Tag Definition

# Tag taxonomy definition
tags:
# Business tags
- name: CostCenter
description: Department or business unit
required: true
values:
- engineering
- marketing
- sales
- finance
- data-platform
- name: Environment
description: Deployment environment
required: true
values:
- production
- staging
- development
- testing
- name: Project
description: Specific project or initiative
required: true
values:
- data-warehouse
- ml-platform
- analytics-platform
# Technical tags
- name: Service
description: Service or application
required: true
values:
- data-lake
- warehouse
- streaming
- batch-processing
- name: ResourceType
description: Type of resource
required: false
values:
- compute
- storage
- database
- network
- name: Owner
description: Team or individual owner
required: true
values:
- data-engineering
- data-science
- devops
# Operational tags
- name: Purpose
description: Purpose of resource
required: false
values:
- etl
- analytics
- reporting
- development
- name: Compliance
description: Compliance requirements
required: false
values:
- pii
- phi
- pci
- gdpr
- name: CreatedBy
description: Who created the resource
required: true
values:
- terraform
- manual
- ci-cd

Tag Implementation

# Tag resources with Terraform
resource "aws_s3_bucket" "data_lake" {
bucket = "my-company-data-lake"
tags = {
Name = "data-lake"
Environment = "production"
CostCenter = "data-platform"
Project = "data-warehouse"
Service = "data-lake"
ResourceType = "storage"
Owner = "data-engineering"
Purpose = "analytics"
ManagedBy = "terraform"
}
}
# Add tags to existing resources
resource "aws_ec2_tag" "tag_existing" {
resource_id = "i-1234567890abcdef0"
tags = {
Environment = "production"
CostCenter = "data-platform"
Owner = "data-engineering"
}
}

Tag Enforcement

# Tag enforcement policy (AWS Organizations)
import boto3
def enforce_tags(resource_id: str, required_tags: dict):
"""Enforce required tags on resource"""
client = boto3.client('resourcegroupstaggingapi')
# Get current tags
current_tags = client.get_tags(ResourceARN=resource_id)['Tags']
# Check required tags
missing_tags = []
for key, value in required_tags.items():
if key not in current_tags or current_tags[key] != value:
missing_tags.append(key)
if missing_tags:
raise Exception(f"Missing required tags: {missing_tags}")
return True
# Example: Enforce tags on resource creation
required_tags = {
'Environment': 'production',
'CostCenter': 'data-platform',
'Owner': 'data-engineering'
}
enforce_tags('arn:aws:s3:::my-bucket', required_tags)

Tag-Based Cost Analysis

Cost Breakdown

# Cost analysis by tags
import boto3
import pandas as pd
from datetime import datetime, timedelta
def get_cost_by_tag(tag_key: str, days: int = 30) -> pd.DataFrame:
"""Get cost breakdown by tag"""
client = boto3.client('ce', region_name='us-east-1')
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
# Get cost by tag
response = client.get_cost_and_usage(
TimePeriod={
'Start': start_date.strftime('%Y-%m-%d'),
'End': end_date.strftime('%Y-%m-%d')
},
Granularity='DAILY',
GroupBy=[
{'Type': 'TAG_KEY', 'Key': tag_key}
],
Metrics=[
{'Name': 'UnblendedCost'}
]
)
# Parse results
costs = []
for result in response['ResultsByTime']:
for group in result['Groups']:
for item in group['Keys']:
costs.append({
'date': result['TimePeriod']['Start'],
tag_key: item,
'cost': item['Metrics'][0]['Amount']
})
return pd.DataFrame(costs)
# Example usage
cost_by_cost_center = get_cost_by_tag('CostCenter', days=30)
print(cost_by_cost_center.groupby('CostCenter')['cost'].sum())

Tag Automation

Automated Tagging

# Automated tag application
import boto3
def tag_untagged_resources():
"""Automatically tag untagged resources"""
client = boto3.client('resourcegrouptaggingapi')
# Get all resources
resources = client.get_resources(
ResourceTypeFilters=[
{'ResourceType': 'aws:s3:::bucket'},
{'ResourceType': 'aws:ec2:instance'}
]
)
# Tag untagged resources
for resource in resources['ResourceTagMappingList']:
arn = resource['ResourceARN']
# Get current tags
current_tags = client.get_tags(ResourceARN=arn)['Tags']
# Check if tagged
if not current_tags:
# Apply default tags
client.tag_resource(
ResourceARN=arn,
Tags={
'Environment': 'unknown',
'CostCenter': 'unallocated',
'Owner': 'auto-tagged',
'CreatedAuto': 'true'
}
)
print(f"Tagged: {arn}")
# Schedule with Airflow
from airflow import DAG
from airflow.operators.python import PythonOperator
from datetime import datetime
dag = DAG(
'tag_untagged_resources',
default_args={'owner': 'finops', 'start_date': datetime(2025, 1, 1)},
schedule_interval='@daily',
catchup=False
)
tag_task = PythonOperator(
task_id='tag_untagged',
python_callable=tag_untagged_resources,
dag=dag
)

Tag Best Practices

DO

# 1. Define tag taxonomy
# Standardize tag names and values
# 2. Make key tags required
# CostCenter, Environment, Owner
# 3. Use tag policies
# Enforce tagging on resource creation
# 4. Automate tagging
# Auto-tag untagged resources
# 5. Document tag purpose
# Clear documentation for each tag

DON’T

# 1. Don't use too many tags
# 10-50 tags maximum
# 2. Don't use inconsistent values
# Standardize tag values
# 3. Don't tag manually only
# Automation is essential
# 4. Don't ignore tag governance
# Enforce tag policies
# 5. Don't forget to document tags
# Documentation is critical

Key Takeaways

  1. Tag taxonomy: Standardize tag names and values
  2. Required tags: CostCenter, Environment, Owner
  3. Cost attribution: Track costs by tag
  4. Enforcement: Tag policies for governance
  5. Automation: Auto-tag untagged resources
  6. Monitoring: Track untagged resources
  7. Chargeback: Enable cost allocation
  8. Use When: All cloud resources, cost tracking

Back to Module 7