Skip to content

Commit d3f0bcf

Browse files
committed
Fix WarmPool usage in the EC2 Auto Scaling Group
1 parent 6410042 commit d3f0bcf

2 files changed

Lines changed: 126 additions & 13 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## [Unreleased]
44
### Fixed
55
- Added `should-wait = true` to the EC2 instance configuration so it doesn't register itself in the ECS cluster while in warm pool.
6+
- Fixed WarmPool usage in the EC2 Auto Scaling Group.
67

78
## [0.2.1] - 2024-02-28
89
### Fixed

template.py

Lines changed: 125 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import troposphere.applicationautoscaling as applicationautoscaling
1818
import troposphere.cloudwatch as cloudwatch
1919
import troposphere.cloudfront as cloudfront
20+
import troposphere.awslambda as aws_lambda
21+
import troposphere.cloudformation as cloudformation
2022

2123
import awacs.aws as aws
2224
import awacs.sts as actions_sts
@@ -26,6 +28,7 @@
2628
import awacs.cloudwatch as actions_cloudwatch
2729
import awacs.ssm as actions_ssm
2830
import awacs.kms as actions_kms
31+
import awacs.autoscaling as actions_autoscaling
2932
import awacs.aws_marketplace as actions_marketplace
3033

3134
cli_parser = argparse.ArgumentParser(description="imgproxy CloudFormation template generator")
@@ -819,18 +822,22 @@ def __init__(self, value_one: object, value_two: object) -> None:
819822
),
820823
),
821824
UpdatePolicy=policies.UpdatePolicy(
822-
AutoScalingRollingUpdate=policies.AutoScalingRollingUpdate(
823-
MinInstancesInService=1,
824-
MaxBatchSize=1,
825-
PauseTime="PT15M",
826-
SuspendProcesses=[
827-
"HealthCheck",
828-
"ReplaceUnhealthy",
829-
"AZRebalance",
830-
"AlarmNotification",
831-
"ScheduledActions",
832-
],
833-
WaitOnResourceSignals=True,
825+
AutoScalingRollingUpdate=If(
826+
cluster_should_add_warm_pool,
827+
NoValue,
828+
policies.AutoScalingRollingUpdate(
829+
MinInstancesInService=1,
830+
MaxBatchSize=1,
831+
PauseTime="PT15M",
832+
SuspendProcesses=[
833+
"HealthCheck",
834+
"ReplaceUnhealthy",
835+
"AZRebalance",
836+
"AlarmNotification",
837+
"ScheduledActions",
838+
],
839+
WaitOnResourceSignals=True,
840+
),
834841
),
835842
),
836843
))
@@ -839,8 +846,12 @@ def __init__(self, value_one: object, value_two: object) -> None:
839846
"EC2AutoScalingGroupWarmPool",
840847
Condition=cluster_should_add_warm_pool,
841848
AutoScalingGroupName=Ref(ec2_autoscaling_group),
849+
# ReuseOnScaleIn should be disabled.
850+
# If an instance is returned to the warm pool and then reused, its status
851+
# will still be "Draining" in ECS and it will not be able to accept new tasks.
852+
# See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/using-warm-pool.html
842853
InstanceReusePolicy=autoscaling.InstanceReusePolicy(
843-
ReuseOnScaleIn=True,
854+
ReuseOnScaleIn=False,
844855
),
845856
))
846857

@@ -884,6 +895,107 @@ def __init__(self, value_one: object, value_two: object) -> None:
884895
],
885896
))
886897

898+
# ==============================================================================
899+
# EC2 AUTOSCALING GROUP INSTANCE REFRESHER
900+
# ==============================================================================
901+
902+
if not args.no_cluster and args.launch_type == "ec2":
903+
instance_refresher_role = template.add_resource(iam.Role(
904+
"InstanceRefresherLambdaRole",
905+
Condition=cluster_should_add_warm_pool,
906+
RoleName=Join("-", [StackName, "instance-refresher"]),
907+
Path="/",
908+
AssumeRolePolicyDocument=aws.PolicyDocument(
909+
Version="2012-10-17",
910+
Statement=[aws.Statement(
911+
Effect=aws.Allow,
912+
Action=[actions_sts.AssumeRole],
913+
Principal=aws.Principal("Service", ["lambda.amazonaws.com"]),
914+
)],
915+
),
916+
ManagedPolicyArns=[
917+
"arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole",
918+
],
919+
Policies=[
920+
iam.Policy(
921+
PolicyName="autoscaling-start-instance-refresh",
922+
PolicyDocument=aws.PolicyDocument(
923+
Version="2012-10-17",
924+
Statement=[aws.Statement(
925+
Effect=aws.Allow,
926+
Action=[
927+
actions_autoscaling.StartInstanceRefresh,
928+
],
929+
Resource=["*"],
930+
)],
931+
),
932+
),
933+
],
934+
))
935+
936+
instance_refresher_lambda = template.add_resource(aws_lambda.Function(
937+
"InstanceRefresherLambda",
938+
Condition=cluster_should_add_warm_pool,
939+
FunctionName=Join("-", [StackName, "instance-refresher"]),
940+
Runtime="python3.12",
941+
Handler="index.handler",
942+
Role=GetAtt(instance_refresher_role, "Arn"),
943+
Timeout=30,
944+
Code=aws_lambda.Code(
945+
ZipFile="""
946+
import cfnresponse
947+
import json
948+
import boto3
949+
950+
client = boto3.client('autoscaling')
951+
952+
def handler(event, context):
953+
response_data = {}
954+
try:
955+
if event['RequestType'] != 'Create' and event['RequestType'] != 'Update':
956+
cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data, 'InstanceRefresher')
957+
return
958+
959+
response = client.start_instance_refresh(
960+
AutoScalingGroupName=event['ResourceProperties']['AutoScalingGroupName'],
961+
Preferences={
962+
'MinHealthyPercentage': 100,
963+
'MaxHealthyPercentage': 200,
964+
'SkipMatching': True,
965+
'ScaleInProtectedInstances': 'Ignore',
966+
'StandbyInstances': 'Ignore'
967+
}
968+
)
969+
response_data['InstanceRefreshId'] = response['InstanceRefreshId']
970+
cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data, 'InstanceRefresher')
971+
except Exception as e:
972+
response_data['exception'] = e.__str__()
973+
cfnresponse.send(event, context, cfnresponse.FAILED, response_data, 'InstanceRefresher')
974+
""".strip(),
975+
),
976+
))
977+
978+
class CustomPlacementGroup(cloudformation.AWSCustomObject):
979+
resource_type = "Custom::InstanceRefresher"
980+
props = {
981+
"ServiceToken": (str, True),
982+
"ServiceTimeout": (str, True),
983+
"AutoScalingGroupName": (str, True),
984+
"LaunchTemplate": (str, True),
985+
"LaunchTemplateVersion": (str, True),
986+
}
987+
988+
template.add_resource(CustomPlacementGroup(
989+
"EC2InstanceRefresher",
990+
Condition=cluster_should_add_warm_pool,
991+
ServiceToken=GetAtt(instance_refresher_lambda, "Arn"),
992+
ServiceTimeout="60",
993+
AutoScalingGroupName=Ref(ec2_autoscaling_group),
994+
# Provide the launch template data just to trigger the update
995+
LaunchTemplate=Ref(ec2_launch_template),
996+
LaunchTemplateVersion=GetAtt(ec2_launch_template, "LatestVersionNumber"),
997+
))
998+
887999
# ==============================================================================
8881000
# ECS TASK DEFINITION
8891001
# ==============================================================================

0 commit comments

Comments
 (0)