Sitemap

AWS BackUp Report Using Boto3

3 min readOct 20, 2024

Recently I had to create a report for AWS BackUp to list down all the resources protected by AWS backup, backup plan associated with those resources and the backup schedule for those backup plans.

For this, I took ChatGPT’s help to create a Python script

import boto3
import csv

# Initialize the Backup client
backup_client = boto3.client('backup')

# Function to get all backup vaults
def get_backup_vaults():
vaults = []
response = backup_client.list_backup_vaults()

# Keep paginating if there are more vaults
while True:
vaults.extend(response['BackupVaultList'])
if 'NextToken' in response:
response = backup_client.list_backup_vaults(NextToken=response['NextToken'])
else:
break
return [vault['BackupVaultName'] for vault in vaults]

# Function to get the backup plan name from BackupPlanId
def get_backup_plan_name(backup_plan_id):
try:
response = backup_client.get_backup_plan(BackupPlanId=backup_plan_id)
return response['BackupPlan']['BackupPlanName']
except Exception as e:
print(f"Error retrieving Backup Plan Name for ID {backup_plan_id}: {e}")
return "Unknown"

# Function to get all backup rules for a given BackupPlanId
def get_all_backup_rule_details(backup_plan_id):
try:
response = backup_client.get_backup_plan(BackupPlanId=backup_plan_id)
rules = response['BackupPlan']['Rules']
rule_details = [(rule['RuleName'], rule['ScheduleExpression']) for rule in rules]
return rule_details
except Exception as e:
print(f"Error retrieving Backup Rules for ID {backup_plan_id}: {e}")
return []

# Function to get latest recovery points for a given vault
def get_latest_recovery_points(vault_name):
response = backup_client.list_recovery_points_by_backup_vault(BackupVaultName=vault_name)
latest_recovery_points = {}

# Process recovery points
for recovery_point in response['RecoveryPoints']:
resource_arn = recovery_point['ResourceArn']
creation_date = recovery_point['CreationDate']

# Get BackupPlanId and BackupRuleId, if available
backup_plan_id = recovery_point.get('CreatedBy', {}).get('BackupPlanId', None)
backup_rule_id = recovery_point.get('CreatedBy', {}).get('BackupRuleId', None)

# If the resource is already in the dictionary, compare the creation dates
if resource_arn in latest_recovery_points:
existing_point = latest_recovery_points[resource_arn]
# Compare creation dates
if creation_date > existing_point['CreationDate']:
latest_recovery_points[resource_arn] = recovery_point
else:
# Add the recovery point if it's not already recorded
latest_recovery_points[resource_arn] = recovery_point

return latest_recovery_points

# Function to get Resource Type from Resource ARN
def get_resource_type(resource_arn):
# The resource type is typically the second-to-last component of the ARN
# Example: arn:aws:dynamodb:ap-southeast-1:123456789012:table/MyTable
try:
return resource_arn.split(':')[2] # Return the service name (e.g., dynamodb)
except IndexError:
return "Unknown"

# Prepare CSV output
output_file = 'latest_recovery_points.csv'

# Write to CSV
with open(output_file, mode='w', newline='') as file:
writer = csv.writer(file)

# Get all backup vaults
vault_names = get_backup_vaults()

# Initialize a header row
csv_headers = ["ResourceArn", "ResourceType", "LatestRecoveryPoint", "CreationDate", "BackupPlanId", "BackupPlanName"]

# Initialize a dict to store all rules for header generation
rule_names = set()

# First pass to collect all recovery points and rule names
all_data = []

for vault_name in vault_names:
latest_recovery_points = get_latest_recovery_points(vault_name)

for resource_arn, recovery_point in latest_recovery_points.items():
backup_plan_id = recovery_point.get('CreatedBy', {}).get('BackupPlanId', None)

if backup_plan_id is not None:
backup_plan_name = get_backup_plan_name(backup_plan_id) # Get the backup plan name
backup_rules = get_all_backup_rule_details(backup_plan_id) # Get all rule names and schedules
resource_type = get_resource_type(resource_arn) # Get resource type

# Collect rule names for dynamic header generation
for rule_name, rule_schedule in backup_rules:
rule_names.add(rule_name)

all_data.append([
resource_arn,
resource_type,
recovery_point['RecoveryPointArn'],
recovery_point['CreationDate'],
backup_plan_id,
backup_plan_name,
backup_rules # Store all backup rules for this resource
])

# Generate dynamic CSV headers for rules
for rule_name in rule_names:
csv_headers.append(f"{rule_name} Schedule")

# Write headers
writer.writerow(csv_headers)

# Second pass to write data, including rules in columns
for data in all_data:
resource_rules = {rule[0]: rule[1] for rule in get_all_backup_rule_details(data[4])} # Get rules for each resource
row = data[:6] # Base row without rules

# Append schedule for each rule as columns
for rule_name in rule_names:
row.append(resource_rules.get(rule_name, "N/A")) # Use "N/A" if the rule is not found

writer.writerow(row)

print(f"Latest recovery points with BackupPlanId written to {output_file}.")

After executing the script, I got my report in the following format(I have obfuscate arn and other sensitive values).

Press enter or click to view image in full size

All done and dusted in less than 1 hour, thanks to ChatGPT :)

--

--

Vinayak Pandey
Vinayak Pandey

Written by Vinayak Pandey

Experienced Cloud Engineer with a knack of automation. Linkedin profile: https://www.linkedin.com/in/vinayakpandeyit/

No responses yet