- Remove duplicate shebang, set -e, and redundant SSM agent install from user data script so cfn-signal always runs (root cause of "0 SUCCESS signals" deploy failure) - Remove DNS record creation from service stack's configureBlueGreenDns() to avoid CloudFormation conflicts with the persistent ALB stack that owns those records - Replace readonly type assertion hacks with direct property assignments on 6 ALB/listener fields - Change default health check path from /health to / for universal compatibility Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
957 lines
33 KiB
TypeScript
957 lines
33 KiB
TypeScript
import * as autoscaling from 'aws-cdk-lib/aws-autoscaling';
|
|
import * as ec2 from 'aws-cdk-lib/aws-ec2';
|
|
import * as ecs from 'aws-cdk-lib/aws-ecs';
|
|
import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2';
|
|
import * as iam from 'aws-cdk-lib/aws-iam';
|
|
import * as lambda from 'aws-cdk-lib/aws-lambda';
|
|
import * as logs from 'aws-cdk-lib/aws-logs';
|
|
import * as s3 from 'aws-cdk-lib/aws-s3';
|
|
import * as sns from 'aws-cdk-lib/aws-sns';
|
|
import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions';
|
|
import * as cdk from 'aws-cdk-lib/core';
|
|
import { Construct } from 'constructs';
|
|
|
|
/**
|
|
* Configuration for VPC tagging
|
|
*/
|
|
export interface SpicyEcsClusterTags {
|
|
owner: string;
|
|
product: string;
|
|
component: string;
|
|
environment: string;
|
|
build?: string;
|
|
}
|
|
|
|
/**
|
|
* Scaling configuration for the ECS cluster
|
|
*/
|
|
export interface ClusterScalingConfig {
|
|
/** Minimum number of instances */
|
|
minCapacity?: number;
|
|
/** Maximum number of instances */
|
|
maxCapacity?: number;
|
|
/** Target capacity utilization percentage for managed scaling (0-100) */
|
|
targetCapacityPercent?: number;
|
|
/** Cooldown period in seconds after scale up */
|
|
scaleUpCooldown?: number;
|
|
/** Cooldown period in seconds after scale down */
|
|
scaleDownCooldown?: number;
|
|
}
|
|
|
|
/**
|
|
* Spot instance configuration
|
|
*/
|
|
export interface SpotConfig {
|
|
/** Enable Spot instances */
|
|
enabled: boolean;
|
|
/** Percentage of On-Demand instances to maintain (0-100) */
|
|
onDemandPercentage?: number;
|
|
/** Spot allocation strategy */
|
|
spotAllocationStrategy?: 'lowest-price' | 'capacity-optimized' | 'capacity-optimized-prioritized';
|
|
/** Maximum Spot price (leave undefined for on-demand price) */
|
|
maxSpotPrice?: string;
|
|
}
|
|
|
|
/**
|
|
* Load balancer configuration
|
|
*/
|
|
export interface LoadBalancerConfig {
|
|
/** Create an internet-facing load balancer */
|
|
createExternal?: boolean;
|
|
/** Create an internal load balancer */
|
|
createInternal?: boolean;
|
|
/** SSL certificate ARN for HTTPS listeners */
|
|
certificateArn?: string;
|
|
/** Idle timeout in seconds */
|
|
idleTimeout?: number;
|
|
/** Enable access logging */
|
|
enableAccessLogs?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Properties for the SpicyEcsCluster construct
|
|
*/
|
|
export interface SpicyEcsClusterProps {
|
|
/**
|
|
* The VPC to deploy the cluster into
|
|
*/
|
|
readonly vpc: ec2.IVpc;
|
|
|
|
/**
|
|
* VPC CIDR block (required for imported VPCs)
|
|
* @default Uses vpc.vpcCidrBlock if available
|
|
*/
|
|
readonly vpcCidrBlock?: string;
|
|
|
|
/**
|
|
* Subnets for EC2 instances (private subnets recommended)
|
|
*/
|
|
readonly instanceSubnets?: ec2.SubnetSelection;
|
|
|
|
/**
|
|
* Subnets for external load balancer (public subnets)
|
|
*/
|
|
readonly externalSubnets?: ec2.SubnetSelection;
|
|
|
|
/**
|
|
* Subnets for internal load balancer (private subnets)
|
|
*/
|
|
readonly internalSubnets?: ec2.SubnetSelection;
|
|
|
|
/**
|
|
* EC2 instance type for the cluster
|
|
* @default m5a.large
|
|
*/
|
|
readonly instanceType?: ec2.InstanceType;
|
|
|
|
/**
|
|
* Additional instance types for mixed instances policy
|
|
* Used with Spot instances for better availability
|
|
*/
|
|
readonly additionalInstanceTypes?: ec2.InstanceType[];
|
|
|
|
/**
|
|
* EC2 Key Pair name for SSH access
|
|
*/
|
|
readonly keyName?: string;
|
|
|
|
/**
|
|
* EBS volume size in GB
|
|
* @default 100
|
|
*/
|
|
readonly ebsVolumeSize?: number;
|
|
|
|
/**
|
|
* Enable Container Insights
|
|
* @default true
|
|
*/
|
|
readonly containerInsights?: boolean;
|
|
|
|
/**
|
|
* Scaling configuration
|
|
*/
|
|
readonly scaling?: ClusterScalingConfig;
|
|
|
|
/**
|
|
* Spot instance configuration
|
|
*/
|
|
readonly spot?: SpotConfig;
|
|
|
|
/**
|
|
* Load balancer configuration
|
|
*/
|
|
readonly loadBalancer?: LoadBalancerConfig;
|
|
|
|
/**
|
|
* Additional security groups for EC2 instances
|
|
*/
|
|
readonly additionalSecurityGroups?: ec2.ISecurityGroup[];
|
|
|
|
/**
|
|
* Enable Fargate capacity providers (adds both FARGATE and FARGATE_SPOT)
|
|
* @default false
|
|
*/
|
|
readonly enableFargate?: boolean;
|
|
|
|
/**
|
|
* Timeout in seconds for draining tasks before termination
|
|
* @default 900
|
|
*/
|
|
readonly drainingTimeout?: number;
|
|
|
|
/**
|
|
* Maximum instance lifetime in seconds (for instance refresh)
|
|
* @default 604800 (7 days)
|
|
*/
|
|
readonly maxInstanceLifetime?: number;
|
|
|
|
/**
|
|
* ELB Account ID for access logs (region-specific)
|
|
* See: https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/enable-access-logs.html
|
|
*/
|
|
readonly elbAccountId?: string;
|
|
|
|
/**
|
|
* Custom S3 bucket name for ALB logs
|
|
*/
|
|
readonly logsBucketName?: string;
|
|
|
|
/**
|
|
* VPC stack name (for exporting to other stacks)
|
|
* Used to export the VPC stack name so other stacks can import VPC details
|
|
*/
|
|
readonly vpcStackName?: string;
|
|
|
|
/**
|
|
* Required tags for the cluster and resources
|
|
*/
|
|
readonly tags: SpicyEcsClusterTags;
|
|
}
|
|
|
|
/**
|
|
* ELB Account IDs by region for access logging
|
|
*/
|
|
const ELB_ACCOUNT_IDS: Record<string, string> = {
|
|
'us-east-1': '127311923021',
|
|
'us-east-2': '033677994240',
|
|
'us-west-1': '027434742980',
|
|
'us-west-2': '797873946194',
|
|
'ca-central-1': '985666609251',
|
|
'eu-west-1': '156460612806',
|
|
'eu-west-2': '652711504416',
|
|
'eu-west-3': '009996457667',
|
|
'eu-central-1': '054676820928',
|
|
'ap-northeast-1': '582318560864',
|
|
'ap-northeast-2': '600734575887',
|
|
'ap-southeast-1': '114774131450',
|
|
'ap-southeast-2': '783225319266',
|
|
'ap-south-1': '718504428378',
|
|
'sa-east-1': '507241528517',
|
|
};
|
|
|
|
/**
|
|
* SpicyEcsCluster - A production-ready ECS cluster with:
|
|
* - EC2 Capacity Provider with managed scaling
|
|
* - Optional Fargate capacity providers
|
|
* - Mixed instances policy for Spot support
|
|
* - Instance draining on termination
|
|
* - Optional internal/external ALBs
|
|
* - Container Insights
|
|
* - Launch Templates with IMDSv2 and gp3 volumes
|
|
*/
|
|
export class SpicyEcsCluster extends Construct {
|
|
/** The ECS cluster */
|
|
public readonly cluster: ecs.Cluster;
|
|
|
|
/** The Auto Scaling Group */
|
|
public readonly autoScalingGroup: autoscaling.AutoScalingGroup;
|
|
|
|
/** EC2 Capacity Provider */
|
|
public readonly ec2CapacityProvider: ecs.AsgCapacityProvider;
|
|
|
|
/** External (internet-facing) load balancer */
|
|
public externalLoadBalancer?: elbv2.ApplicationLoadBalancer;
|
|
|
|
/** Internal load balancer */
|
|
public internalLoadBalancer?: elbv2.ApplicationLoadBalancer;
|
|
|
|
/** External HTTPS listener */
|
|
public externalHttpsListener?: elbv2.ApplicationListener;
|
|
|
|
/** External HTTP listener */
|
|
public externalHttpListener?: elbv2.ApplicationListener;
|
|
|
|
/** Internal HTTPS listener */
|
|
public internalHttpsListener?: elbv2.ApplicationListener;
|
|
|
|
/** Internal HTTP listener */
|
|
public internalHttpListener?: elbv2.ApplicationListener;
|
|
|
|
/** ECS Host security group */
|
|
public readonly ecsHostSecurityGroup: ec2.SecurityGroup;
|
|
|
|
/** Load balancer security group */
|
|
public readonly loadBalancerSecurityGroup?: ec2.SecurityGroup;
|
|
|
|
/** S3 bucket for ALB access logs */
|
|
public readonly logsBucket?: s3.Bucket;
|
|
|
|
constructor(scope: Construct, id: string, props: SpicyEcsClusterProps) {
|
|
super(scope, id);
|
|
|
|
const stack = cdk.Stack.of(this);
|
|
const region = stack.region;
|
|
|
|
// Default values
|
|
const instanceType = props.instanceType ?? ec2.InstanceType.of(ec2.InstanceClass.M5A, ec2.InstanceSize.LARGE);
|
|
const ebsVolumeSize = props.ebsVolumeSize ?? 100;
|
|
const containerInsights = props.containerInsights ?? true;
|
|
const drainingTimeout = props.drainingTimeout ?? 900;
|
|
const maxInstanceLifetime = props.maxInstanceLifetime ?? 604800;
|
|
|
|
const scaling = {
|
|
minCapacity: props.scaling?.minCapacity ?? 2,
|
|
maxCapacity: props.scaling?.maxCapacity ?? 4,
|
|
targetCapacityPercent: props.scaling?.targetCapacityPercent ?? 100,
|
|
scaleUpCooldown: props.scaling?.scaleUpCooldown ?? 60,
|
|
scaleDownCooldown: props.scaling?.scaleDownCooldown ?? 300,
|
|
};
|
|
|
|
const spot = {
|
|
enabled: props.spot?.enabled ?? false,
|
|
onDemandPercentage: props.spot?.onDemandPercentage ?? 100,
|
|
spotAllocationStrategy: props.spot?.spotAllocationStrategy ?? 'capacity-optimized',
|
|
};
|
|
|
|
const loadBalancer = {
|
|
createExternal: props.loadBalancer?.createExternal ?? false,
|
|
createInternal: props.loadBalancer?.createInternal ?? false,
|
|
certificateArn: props.loadBalancer?.certificateArn,
|
|
idleTimeout: props.loadBalancer?.idleTimeout ?? 60,
|
|
enableAccessLogs: props.loadBalancer?.enableAccessLogs ?? true,
|
|
};
|
|
|
|
// Create ECS Cluster
|
|
this.cluster = new ecs.Cluster(this, 'Cluster', {
|
|
vpc: props.vpc,
|
|
clusterName: stack.stackName,
|
|
enableFargateCapacityProviders: false,
|
|
});
|
|
|
|
// Enable Container Insights if requested
|
|
if (containerInsights) {
|
|
const cfnCluster = this.cluster.node.defaultChild as ecs.CfnCluster;
|
|
cfnCluster.configuration = {
|
|
executeCommandConfiguration: {
|
|
logging: 'DEFAULT',
|
|
},
|
|
};
|
|
cfnCluster.addPropertyOverride('ClusterSettings', [
|
|
{
|
|
Name: 'containerInsights',
|
|
Value: 'enabled',
|
|
},
|
|
]);
|
|
}
|
|
(this.cluster.node.defaultChild as ecs.CfnCluster).overrideLogicalId('Cluster');
|
|
|
|
// Security Groups
|
|
this.ecsHostSecurityGroup = new ec2.SecurityGroup(this, 'ECSHostSecurityGroup', {
|
|
vpc: props.vpc,
|
|
description: 'Security group for ECS container instances',
|
|
allowAllOutbound: true,
|
|
});
|
|
(this.ecsHostSecurityGroup.node.defaultChild as ec2.CfnSecurityGroup).overrideLogicalId('ECSHostSecurityGroup');
|
|
|
|
// Allow internal VPC traffic
|
|
const vpcCidrBlock = props.vpcCidrBlock ?? props.vpc.vpcCidrBlock;
|
|
if (vpcCidrBlock) {
|
|
this.ecsHostSecurityGroup.addIngressRule(
|
|
ec2.Peer.ipv4(vpcCidrBlock),
|
|
ec2.Port.allTraffic(),
|
|
'Allow internal VPC traffic'
|
|
);
|
|
}
|
|
|
|
// IAM Role for EC2 instances
|
|
const instanceRole = new iam.Role(this, 'InstanceRole', {
|
|
assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'),
|
|
managedPolicies: [
|
|
iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2ContainerServiceforEC2Role'),
|
|
iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'),
|
|
],
|
|
});
|
|
(instanceRole.node.defaultChild as iam.CfnRole).overrideLogicalId('InstanceRole');
|
|
|
|
// Additional permissions for ECS instances
|
|
instanceRole.addToPolicy(
|
|
new iam.PolicyStatement({
|
|
actions: [
|
|
'logs:CreateLogGroup',
|
|
'logs:CreateLogStream',
|
|
'logs:PutLogEvents',
|
|
'logs:DescribeLogGroups',
|
|
'logs:DescribeLogStreams',
|
|
],
|
|
resources: ['*'],
|
|
})
|
|
);
|
|
|
|
instanceRole.addToPolicy(
|
|
new iam.PolicyStatement({
|
|
actions: ['ec2:DescribeVolumes', 'ec2:CreateTags'],
|
|
resources: ['*'],
|
|
})
|
|
);
|
|
|
|
instanceRole.addToPolicy(
|
|
new iam.PolicyStatement({
|
|
actions: ['ecs:UpdateContainerInstancesState'],
|
|
resources: ['*'],
|
|
})
|
|
);
|
|
|
|
// User data script
|
|
const userData = ec2.UserData.forLinux();
|
|
userData.addCommands(
|
|
'# Configure ECS agent',
|
|
`echo "ECS_CLUSTER=${stack.stackName}" >> /etc/ecs/ecs.config`,
|
|
'echo "ECS_ENABLE_SPOT_INSTANCE_DRAINING=true" >> /etc/ecs/ecs.config',
|
|
'echo "ECS_ENABLE_CONTAINER_METADATA=true" >> /etc/ecs/ecs.config',
|
|
'echo \'ECS_AVAILABLE_LOGGING_DRIVERS=["json-file","awslogs","splunk"]\' >> /etc/ecs/ecs.config',
|
|
'',
|
|
'# Signal CloudFormation (always runs, reports actual exit code)',
|
|
`/opt/aws/bin/cfn-signal -e $? --stack ${stack.stackName} --resource AutoScalingGroup --region ${region}`
|
|
);
|
|
|
|
// Create Auto Scaling Group with Launch Template
|
|
this.autoScalingGroup = new autoscaling.AutoScalingGroup(this, 'AutoScalingGroup', {
|
|
vpc: props.vpc,
|
|
vpcSubnets: props.instanceSubnets ?? { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
|
|
instanceType: instanceType,
|
|
machineImage: ecs.EcsOptimizedImage.amazonLinux2(),
|
|
role: instanceRole,
|
|
minCapacity: scaling.minCapacity,
|
|
maxCapacity: scaling.maxCapacity,
|
|
maxInstanceLifetime: cdk.Duration.seconds(maxInstanceLifetime),
|
|
healthChecks: autoscaling.HealthChecks.ec2({
|
|
gracePeriod: cdk.Duration.minutes(5),
|
|
}),
|
|
updatePolicy: autoscaling.UpdatePolicy.rollingUpdate({
|
|
maxBatchSize: 1,
|
|
minInstancesInService: 1,
|
|
pauseTime: cdk.Duration.minutes(5),
|
|
}),
|
|
signals: autoscaling.Signals.waitForMinCapacity({
|
|
timeout: cdk.Duration.minutes(15),
|
|
}),
|
|
userData: userData,
|
|
blockDevices: [
|
|
{
|
|
deviceName: '/dev/xvda',
|
|
volume: autoscaling.BlockDeviceVolume.ebs(ebsVolumeSize, {
|
|
volumeType: autoscaling.EbsDeviceVolumeType.GP3,
|
|
encrypted: true,
|
|
}),
|
|
},
|
|
],
|
|
keyPair: props.keyName ? ec2.KeyPair.fromKeyPairName(this, 'KeyPair', props.keyName) : undefined,
|
|
securityGroup: this.ecsHostSecurityGroup,
|
|
});
|
|
(this.autoScalingGroup.node.defaultChild as autoscaling.CfnAutoScalingGroup).overrideLogicalId('AutoScalingGroup');
|
|
|
|
// Add additional security groups
|
|
if (props.additionalSecurityGroups) {
|
|
for (const sg of props.additionalSecurityGroups) {
|
|
this.autoScalingGroup.addSecurityGroup(sg);
|
|
}
|
|
}
|
|
|
|
// Configure Mixed Instances Policy for Spot support
|
|
if (spot.enabled) {
|
|
const cfnAsg = this.autoScalingGroup.node.defaultChild as autoscaling.CfnAutoScalingGroup;
|
|
|
|
// Get the launch template from the ASG
|
|
const launchTemplate = this.autoScalingGroup.node.findChild('LaunchTemplate') as ec2.LaunchTemplate;
|
|
const cfnLaunchTemplate = launchTemplate.node.defaultChild as ec2.CfnLaunchTemplate;
|
|
|
|
// Build instance type overrides
|
|
const instanceTypes = [instanceType, ...(props.additionalInstanceTypes ?? [])];
|
|
const overrides = instanceTypes.map((it) => ({
|
|
instanceType: it.toString(),
|
|
}));
|
|
|
|
cfnAsg.mixedInstancesPolicy = {
|
|
launchTemplate: {
|
|
launchTemplateSpecification: {
|
|
launchTemplateId: cfnLaunchTemplate.ref,
|
|
version: cfnLaunchTemplate.attrLatestVersionNumber,
|
|
},
|
|
overrides: overrides,
|
|
},
|
|
instancesDistribution: {
|
|
onDemandBaseCapacity: 0,
|
|
onDemandPercentageAboveBaseCapacity: spot.onDemandPercentage,
|
|
spotAllocationStrategy: spot.spotAllocationStrategy,
|
|
spotMaxPrice: props.spot?.maxSpotPrice,
|
|
},
|
|
};
|
|
|
|
// Remove the direct launch template reference since we're using mixed instances
|
|
cfnAsg.launchTemplate = undefined;
|
|
cfnAsg.launchConfigurationName = undefined;
|
|
}
|
|
|
|
// Enable IMDSv2
|
|
const asgLaunchTemplate = this.autoScalingGroup.node.tryFindChild('LaunchTemplate');
|
|
if (asgLaunchTemplate) {
|
|
const cfnLt = asgLaunchTemplate.node.defaultChild as ec2.CfnLaunchTemplate;
|
|
cfnLt.addPropertyOverride('LaunchTemplateData.MetadataOptions', {
|
|
HttpTokens: 'required',
|
|
HttpPutResponseHopLimit: 2,
|
|
HttpEndpoint: 'enabled',
|
|
});
|
|
}
|
|
|
|
// Create EC2 Capacity Provider with managed scaling
|
|
this.ec2CapacityProvider = new ecs.AsgCapacityProvider(this, 'EC2CapacityProvider', {
|
|
autoScalingGroup: this.autoScalingGroup,
|
|
enableManagedScaling: true,
|
|
enableManagedTerminationProtection: true,
|
|
targetCapacityPercent: scaling.targetCapacityPercent,
|
|
capacityProviderName: `${stack.stackName}-ec2`,
|
|
});
|
|
// Override capacity provider logical ID (the child is named 'EC2CapacityProvider')
|
|
const cfnCapacityProvider = this.ec2CapacityProvider.node.tryFindChild(
|
|
'EC2CapacityProvider'
|
|
) as ecs.CfnCapacityProvider;
|
|
if (cfnCapacityProvider) {
|
|
cfnCapacityProvider.overrideLogicalId('EC2CapacityProvider');
|
|
}
|
|
|
|
// Override Launch Template and Instance Profile logical IDs
|
|
if (asgLaunchTemplate) {
|
|
const cfnLaunchTemplate = asgLaunchTemplate.node.defaultChild as ec2.CfnLaunchTemplate;
|
|
cfnLaunchTemplate.overrideLogicalId('LaunchTemplate');
|
|
}
|
|
const instanceProfile = this.autoScalingGroup.node.tryFindChild('InstanceProfile');
|
|
if (instanceProfile) {
|
|
(instanceProfile as iam.CfnInstanceProfile).overrideLogicalId('InstanceProfile');
|
|
}
|
|
|
|
// Add EC2 capacity provider to cluster
|
|
this.cluster.addAsgCapacityProvider(this.ec2CapacityProvider);
|
|
|
|
// Add Fargate capacity providers if enabled
|
|
if (props.enableFargate) {
|
|
this.cluster.enableFargateCapacityProviders();
|
|
}
|
|
|
|
// Instance draining Lambda
|
|
this.createDrainingLambda(props, drainingTimeout);
|
|
|
|
// Load Balancers
|
|
if (loadBalancer.createExternal || loadBalancer.createInternal) {
|
|
this.loadBalancerSecurityGroup = new ec2.SecurityGroup(this, 'LoadBalancerSecurityGroup', {
|
|
vpc: props.vpc,
|
|
description: 'Security group for ALBs',
|
|
allowAllOutbound: true,
|
|
});
|
|
(this.loadBalancerSecurityGroup.node.defaultChild as ec2.CfnSecurityGroup).overrideLogicalId(
|
|
'LoadBalancerSecurityGroup'
|
|
);
|
|
|
|
// Allow ALB to communicate with ECS hosts
|
|
this.ecsHostSecurityGroup.addIngressRule(
|
|
this.loadBalancerSecurityGroup,
|
|
ec2.Port.allTraffic(),
|
|
'Allow traffic from ALB'
|
|
);
|
|
|
|
// Create logs bucket if access logs enabled
|
|
if (loadBalancer.enableAccessLogs) {
|
|
const elbAccountId = props.elbAccountId ?? ELB_ACCOUNT_IDS[region] ?? ELB_ACCOUNT_IDS['ca-central-1'];
|
|
|
|
this.logsBucket = new s3.Bucket(this, 'LogsBucket', {
|
|
bucketName: props.logsBucketName ?? `${stack.stackName}-${region}-alb-logs`,
|
|
encryption: s3.BucketEncryption.S3_MANAGED,
|
|
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
|
|
lifecycleRules: [
|
|
{
|
|
expiration: cdk.Duration.days(365),
|
|
},
|
|
],
|
|
removalPolicy: cdk.RemovalPolicy.RETAIN,
|
|
});
|
|
(this.logsBucket.node.defaultChild as s3.CfnBucket).overrideLogicalId('LogsBucket');
|
|
|
|
this.logsBucket.addToResourcePolicy(
|
|
new iam.PolicyStatement({
|
|
actions: ['s3:PutObject'],
|
|
resources: [`${this.logsBucket.bucketArn}/*`],
|
|
principals: [new iam.AccountPrincipal(elbAccountId)],
|
|
})
|
|
);
|
|
}
|
|
|
|
if (loadBalancer.createExternal) {
|
|
this.createExternalLoadBalancer(props, loadBalancer);
|
|
}
|
|
|
|
if (loadBalancer.createInternal) {
|
|
this.createInternalLoadBalancer(props, loadBalancer);
|
|
}
|
|
}
|
|
|
|
// Apply tags
|
|
this.applyTags(props.tags);
|
|
|
|
// Add outputs
|
|
this.addOutputs(props);
|
|
}
|
|
|
|
/**
|
|
* Create instance draining Lambda function
|
|
*/
|
|
private createDrainingLambda(props: SpicyEcsClusterProps, drainingTimeout: number): void {
|
|
const stack = cdk.Stack.of(this);
|
|
|
|
// SNS Topic for ASG lifecycle events
|
|
const lifecycleTopic = new sns.Topic(this, 'LifecycleTopic', {
|
|
displayName: `${stack.stackName}-lifecycle`,
|
|
});
|
|
(lifecycleTopic.node.defaultChild as sns.CfnTopic).overrideLogicalId('LifecycleTopic');
|
|
|
|
// Lambda execution role
|
|
const lambdaRole = new iam.Role(this, 'DrainingLambdaRole', {
|
|
assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'),
|
|
managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole')],
|
|
});
|
|
(lambdaRole.node.defaultChild as iam.CfnRole).overrideLogicalId('DrainingLambdaRole');
|
|
|
|
lambdaRole.addToPolicy(
|
|
new iam.PolicyStatement({
|
|
actions: [
|
|
'autoscaling:CompleteLifecycleAction',
|
|
'autoscaling:RecordLifecycleActionHeartbeat',
|
|
'ecs:ListContainerInstances',
|
|
'ecs:DescribeContainerInstances',
|
|
'ecs:UpdateContainerInstancesState',
|
|
'sns:Publish',
|
|
],
|
|
resources: ['*'],
|
|
})
|
|
);
|
|
|
|
// Draining Lambda function
|
|
const drainingLambda = new lambda.Function(this, 'DrainingLambda', {
|
|
runtime: lambda.Runtime.PYTHON_3_11,
|
|
handler: 'index.lambda_handler',
|
|
role: lambdaRole,
|
|
timeout: cdk.Duration.seconds(60),
|
|
memorySize: 128,
|
|
description: 'Gracefully drain ECS tasks before instance termination',
|
|
logGroup: new logs.LogGroup(this, 'DrainingLambdaLogGroup', {
|
|
retention: logs.RetentionDays.ONE_WEEK,
|
|
removalPolicy: cdk.RemovalPolicy.DESTROY,
|
|
}),
|
|
code: lambda.Code.fromInline(`
|
|
import datetime
|
|
import json
|
|
import time
|
|
import boto3
|
|
|
|
CLUSTER = '${stack.stackName}'
|
|
TIMEOUT = ${drainingTimeout}
|
|
REGION = '${stack.region}'
|
|
|
|
def aws(svc):
|
|
return boto3.client(svc, region_name=REGION)
|
|
|
|
ASG = aws('autoscaling')
|
|
ECS = aws('ecs')
|
|
SNS = aws('sns')
|
|
|
|
def lookup_instance(msg):
|
|
res = ECS.list_container_instances(cluster=CLUSTER, filter='ec2InstanceId == %s' % (msg['EC2InstanceId']))
|
|
if not res['containerInstanceArns']:
|
|
return None, None, 0
|
|
res = ECS.describe_container_instances(cluster=CLUSTER, containerInstances=res['containerInstanceArns'])
|
|
ret = (res['containerInstances'][0]['containerInstanceArn'], res['containerInstances'][0]['status'], res['containerInstances'][0]['runningTasksCount'])
|
|
print('Found: %s %s' % (str(ret), msg))
|
|
return ret
|
|
|
|
def can_terminate(msg):
|
|
(arn, status, count) = lookup_instance(msg)
|
|
if arn is None:
|
|
print('Cannot lookup: %s' % (msg))
|
|
return True
|
|
if status != 'DRAINING':
|
|
print('Draining: %s' % (msg))
|
|
ECS.update_container_instances_state(cluster=CLUSTER, containerInstances=[arn], status='DRAINING')
|
|
return False
|
|
if count == 0:
|
|
print('Finished draining: %s' % (msg))
|
|
return True
|
|
now = datetime.datetime.now().timestamp()
|
|
if msg['instance_timeout'] < now:
|
|
print('Timed out: %s' % (msg))
|
|
return True
|
|
return False
|
|
|
|
def lambda_handler(event, context):
|
|
msg = json.loads(event['Records'][0]['Sns']['Message'])
|
|
if 'instance_timeout' not in msg:
|
|
msg['instance_timeout'] = (datetime.datetime.now() + datetime.timedelta(seconds=TIMEOUT)).timestamp()
|
|
if 'LifecycleTransition' not in msg.keys() or msg['LifecycleTransition'].find('autoscaling:EC2_INSTANCE_TERMINATING') == -1:
|
|
print('Unknown transition: %s' % (msg))
|
|
return
|
|
if can_terminate(msg):
|
|
print('ASG complete: %s' % (msg))
|
|
ASG.complete_lifecycle_action(LifecycleHookName=msg['LifecycleHookName'], AutoScalingGroupName=msg['AutoScalingGroupName'], LifecycleActionResult='CONTINUE', InstanceId=msg['EC2InstanceId'])
|
|
return
|
|
print('Tasks are still running: %s' % (msg))
|
|
time.sleep(20)
|
|
ASG.record_lifecycle_action_heartbeat(LifecycleHookName=msg['LifecycleHookName'], AutoScalingGroupName=msg['AutoScalingGroupName'], LifecycleActionToken=msg['LifecycleActionToken'], InstanceId=msg['EC2InstanceId'])
|
|
SNS.publish(TopicArn=event['Records'][0]['Sns']['TopicArn'], Message=json.dumps(msg), Subject='Retry')
|
|
`),
|
|
});
|
|
(drainingLambda.node.defaultChild as lambda.CfnFunction).overrideLogicalId('DrainingLambda');
|
|
|
|
// Subscribe Lambda to SNS topic
|
|
lifecycleTopic.addSubscription(new subscriptions.LambdaSubscription(drainingLambda));
|
|
|
|
// Lifecycle hook role
|
|
const lifecycleRole = new iam.Role(this, 'LifecycleRole', {
|
|
assumedBy: new iam.ServicePrincipal('autoscaling.amazonaws.com'),
|
|
managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AutoScalingNotificationAccessRole')],
|
|
});
|
|
(lifecycleRole.node.defaultChild as iam.CfnRole).overrideLogicalId('LifecycleRole');
|
|
|
|
// Add lifecycle hook
|
|
this.autoScalingGroup.addLifecycleHook('TerminationHook', {
|
|
lifecycleTransition: autoscaling.LifecycleTransition.INSTANCE_TERMINATING,
|
|
notificationTarget: new (class implements autoscaling.ILifecycleHookTarget {
|
|
bind(_scope: Construct, _options: autoscaling.BindHookTargetOptions): autoscaling.LifecycleHookTargetConfig {
|
|
return {
|
|
notificationTargetArn: lifecycleTopic.topicArn,
|
|
createdRole: lifecycleRole,
|
|
};
|
|
}
|
|
})(),
|
|
defaultResult: autoscaling.DefaultResult.ABANDON,
|
|
heartbeatTimeout: cdk.Duration.seconds(120),
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Create external (internet-facing) load balancer
|
|
*/
|
|
private createExternalLoadBalancer(
|
|
props: SpicyEcsClusterProps,
|
|
config: { certificateArn?: string; idleTimeout: number; enableAccessLogs: boolean }
|
|
): void {
|
|
this.loadBalancerSecurityGroup!.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP');
|
|
this.loadBalancerSecurityGroup!.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS');
|
|
|
|
this.externalLoadBalancer = new elbv2.ApplicationLoadBalancer(this, 'ExternalLoadBalancer', {
|
|
vpc: props.vpc,
|
|
internetFacing: true,
|
|
securityGroup: this.loadBalancerSecurityGroup,
|
|
vpcSubnets: props.externalSubnets ?? { subnetType: ec2.SubnetType.PUBLIC },
|
|
idleTimeout: cdk.Duration.seconds(config.idleTimeout),
|
|
});
|
|
(this.externalLoadBalancer!.node.defaultChild as elbv2.CfnLoadBalancer).overrideLogicalId('ExternalLoadBalancer');
|
|
|
|
if (config.enableAccessLogs && this.logsBucket) {
|
|
this.externalLoadBalancer!.logAccessLogs(this.logsBucket, 'external');
|
|
}
|
|
|
|
// HTTP Listener
|
|
this.externalHttpListener = this.externalLoadBalancer!.addListener('ExternalHTTP', {
|
|
port: 80,
|
|
protocol: elbv2.ApplicationProtocol.HTTP,
|
|
defaultAction: elbv2.ListenerAction.fixedResponse(404, {
|
|
contentType: 'text/plain',
|
|
messageBody: 'Not Found',
|
|
}),
|
|
});
|
|
|
|
// HTTPS Listener (if certificate provided)
|
|
if (config.certificateArn) {
|
|
this.externalHttpsListener = this.externalLoadBalancer!.addListener('ExternalHTTPS', {
|
|
port: 443,
|
|
protocol: elbv2.ApplicationProtocol.HTTPS,
|
|
certificates: [elbv2.ListenerCertificate.fromArn(config.certificateArn)],
|
|
defaultAction: elbv2.ListenerAction.fixedResponse(404, {
|
|
contentType: 'text/plain',
|
|
messageBody: 'Not Found',
|
|
}),
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create internal load balancer
|
|
*/
|
|
private createInternalLoadBalancer(
|
|
props: SpicyEcsClusterProps,
|
|
config: { certificateArn?: string; idleTimeout: number; enableAccessLogs: boolean }
|
|
): void {
|
|
this.internalLoadBalancer = new elbv2.ApplicationLoadBalancer(this, 'InternalLoadBalancer', {
|
|
vpc: props.vpc,
|
|
internetFacing: false,
|
|
securityGroup: this.loadBalancerSecurityGroup,
|
|
vpcSubnets: props.internalSubnets ?? { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
|
|
idleTimeout: cdk.Duration.seconds(config.idleTimeout),
|
|
});
|
|
(this.internalLoadBalancer!.node.defaultChild as elbv2.CfnLoadBalancer).overrideLogicalId('InternalLoadBalancer');
|
|
|
|
if (config.enableAccessLogs && this.logsBucket) {
|
|
this.internalLoadBalancer!.logAccessLogs(this.logsBucket, 'internal');
|
|
}
|
|
|
|
// HTTP Listener
|
|
this.internalHttpListener = this.internalLoadBalancer!.addListener('InternalHTTP', {
|
|
port: 80,
|
|
protocol: elbv2.ApplicationProtocol.HTTP,
|
|
defaultAction: elbv2.ListenerAction.fixedResponse(404, {
|
|
contentType: 'text/plain',
|
|
messageBody: 'Not Found',
|
|
}),
|
|
});
|
|
|
|
// HTTPS Listener (if certificate provided)
|
|
if (config.certificateArn) {
|
|
this.internalHttpsListener = this.internalLoadBalancer!.addListener('InternalHTTPS', {
|
|
port: 443,
|
|
protocol: elbv2.ApplicationProtocol.HTTPS,
|
|
certificates: [elbv2.ListenerCertificate.fromArn(config.certificateArn)],
|
|
defaultAction: elbv2.ListenerAction.fixedResponse(404, {
|
|
contentType: 'text/plain',
|
|
messageBody: 'Not Found',
|
|
}),
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Apply tags to all resources
|
|
*/
|
|
private applyTags(tags: SpicyEcsClusterTags): void {
|
|
const stack = cdk.Stack.of(this);
|
|
|
|
cdk.Tags.of(this).add('Name', stack.stackName);
|
|
cdk.Tags.of(this).add('Owner', tags.owner);
|
|
cdk.Tags.of(this).add('Product', tags.product);
|
|
cdk.Tags.of(this).add('Component', tags.component);
|
|
cdk.Tags.of(this).add('Environment', tags.environment);
|
|
if (tags.build) {
|
|
cdk.Tags.of(this).add('Build', tags.build);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add CloudFormation outputs
|
|
*/
|
|
private addOutputs(props: SpicyEcsClusterProps): void {
|
|
const stack = cdk.Stack.of(this);
|
|
|
|
const clusterNameOutput = new cdk.CfnOutput(this, 'ClusterNameOutput', {
|
|
value: this.cluster.clusterName,
|
|
description: 'ECS Cluster Name',
|
|
exportName: `${stack.stackName}-cluster-name`,
|
|
});
|
|
clusterNameOutput.overrideLogicalId('ClusterName');
|
|
|
|
const clusterArnOutput = new cdk.CfnOutput(this, 'ClusterArnOutput', {
|
|
value: this.cluster.clusterArn,
|
|
description: 'ECS Cluster ARN',
|
|
exportName: `${stack.stackName}-cluster-arn`,
|
|
});
|
|
clusterArnOutput.overrideLogicalId('ClusterArn');
|
|
|
|
const vpcOutput = new cdk.CfnOutput(this, 'VPCOutput', {
|
|
value: props.vpc.vpcId,
|
|
description: 'VPC ID',
|
|
exportName: `${stack.stackName}-VPC`,
|
|
});
|
|
vpcOutput.overrideLogicalId('VPC');
|
|
|
|
// Export VPC stack name if provided (for other stacks to import VPC details)
|
|
if (props.vpcStackName) {
|
|
const vpcStackNameOutput = new cdk.CfnOutput(this, 'VPCStackNameOutput', {
|
|
value: props.vpcStackName,
|
|
description: 'VPC Stack Name',
|
|
exportName: `${stack.stackName}-VPCStackName`,
|
|
});
|
|
vpcStackNameOutput.overrideLogicalId('VPCStackName');
|
|
}
|
|
|
|
const sgOutput = new cdk.CfnOutput(this, 'ECSHostSecurityGroupIdOutput', {
|
|
value: this.ecsHostSecurityGroup.securityGroupId,
|
|
description: 'ECS Host Security Group',
|
|
exportName: `${stack.stackName}-ecs-host-security-group`,
|
|
});
|
|
sgOutput.overrideLogicalId('ECSHostSecurityGroupId');
|
|
|
|
const asgOutput = new cdk.CfnOutput(this, 'AutoScalingGroupNameOutput', {
|
|
value: this.autoScalingGroup.autoScalingGroupName,
|
|
description: 'Auto Scaling Group Name',
|
|
exportName: `${stack.stackName}-auto-scaling-group`,
|
|
});
|
|
asgOutput.overrideLogicalId('AutoScalingGroupName');
|
|
|
|
if (this.externalLoadBalancer) {
|
|
const extDnsOutput = new cdk.CfnOutput(this, 'ExternalLoadBalancerDNSOutput', {
|
|
value: this.externalLoadBalancer.loadBalancerDnsName,
|
|
description: 'External Load Balancer DNS',
|
|
exportName: `${stack.stackName}-internet-facing-url`,
|
|
});
|
|
extDnsOutput.overrideLogicalId('ExternalLoadBalancerDNS');
|
|
|
|
const extArnOutput = new cdk.CfnOutput(this, 'ExternalLoadBalancerArnOutput', {
|
|
value: this.externalLoadBalancer.loadBalancerArn,
|
|
description: 'External Load Balancer ARN',
|
|
exportName: `${stack.stackName}-internet-facing-arn`,
|
|
});
|
|
extArnOutput.overrideLogicalId('ExternalLoadBalancerArn');
|
|
|
|
const extHzOutput = new cdk.CfnOutput(this, 'ExternalLoadBalancerHostedZoneIdOutput', {
|
|
value: this.externalLoadBalancer.loadBalancerCanonicalHostedZoneId,
|
|
description: 'External Load Balancer Hosted Zone ID',
|
|
exportName: `${stack.stackName}-internet-facing-hosted-zone-id`,
|
|
});
|
|
extHzOutput.overrideLogicalId('ExternalLoadBalancerHostedZoneId');
|
|
|
|
if (this.externalHttpListener) {
|
|
const extHttpOutput = new cdk.CfnOutput(this, 'ExternalHTTPListenerArnOutput', {
|
|
value: this.externalHttpListener.listenerArn,
|
|
description: 'External HTTP Listener ARN',
|
|
exportName: `${stack.stackName}-internet-facing-http-listener`,
|
|
});
|
|
extHttpOutput.overrideLogicalId('ExternalHTTPListenerArn');
|
|
}
|
|
|
|
if (this.externalHttpsListener) {
|
|
const extHttpsOutput = new cdk.CfnOutput(this, 'ExternalHTTPSListenerArnOutput', {
|
|
value: this.externalHttpsListener.listenerArn,
|
|
description: 'External HTTPS Listener ARN',
|
|
exportName: `${stack.stackName}-internet-facing-https-listener`,
|
|
});
|
|
extHttpsOutput.overrideLogicalId('ExternalHTTPSListenerArn');
|
|
}
|
|
}
|
|
|
|
if (this.internalLoadBalancer) {
|
|
const intDnsOutput = new cdk.CfnOutput(this, 'InternalLoadBalancerDNSOutput', {
|
|
value: this.internalLoadBalancer.loadBalancerDnsName,
|
|
description: 'Internal Load Balancer DNS',
|
|
exportName: `${stack.stackName}-internal-url`,
|
|
});
|
|
intDnsOutput.overrideLogicalId('InternalLoadBalancerDNS');
|
|
|
|
const intArnOutput = new cdk.CfnOutput(this, 'InternalLoadBalancerArnOutput', {
|
|
value: this.internalLoadBalancer.loadBalancerArn,
|
|
description: 'Internal Load Balancer ARN',
|
|
exportName: `${stack.stackName}-internal-arn`,
|
|
});
|
|
intArnOutput.overrideLogicalId('InternalLoadBalancerArn');
|
|
|
|
const intHzOutput = new cdk.CfnOutput(this, 'InternalLoadBalancerHostedZoneIdOutput', {
|
|
value: this.internalLoadBalancer.loadBalancerCanonicalHostedZoneId,
|
|
description: 'Internal Load Balancer Hosted Zone ID',
|
|
exportName: `${stack.stackName}-internal-hosted-zone-id`,
|
|
});
|
|
intHzOutput.overrideLogicalId('InternalLoadBalancerHostedZoneId');
|
|
|
|
if (this.internalHttpListener) {
|
|
const intHttpOutput = new cdk.CfnOutput(this, 'InternalHTTPListenerArnOutput', {
|
|
value: this.internalHttpListener.listenerArn,
|
|
description: 'Internal HTTP Listener ARN',
|
|
exportName: `${stack.stackName}-internal-http-listener`,
|
|
});
|
|
intHttpOutput.overrideLogicalId('InternalHTTPListenerArn');
|
|
}
|
|
|
|
if (this.internalHttpsListener) {
|
|
const intHttpsOutput = new cdk.CfnOutput(this, 'InternalHTTPSListenerArnOutput', {
|
|
value: this.internalHttpsListener.listenerArn,
|
|
description: 'Internal HTTPS Listener ARN',
|
|
exportName: `${stack.stackName}-internal-https-listener`,
|
|
});
|
|
intHttpsOutput.overrideLogicalId('InternalHTTPSListenerArn');
|
|
}
|
|
}
|
|
|
|
if (this.logsBucket) {
|
|
const logsOutput = new cdk.CfnOutput(this, 'LogsBucketNameOutput', {
|
|
value: this.logsBucket.bucketName,
|
|
description: 'ALB Logs S3 Bucket',
|
|
exportName: `${stack.stackName}-logs-s3-bucket`,
|
|
});
|
|
logsOutput.overrideLogicalId('LogsBucketName');
|
|
}
|
|
}
|
|
}
|