我有一个使用 Terraform 的环境设置。我们的 SecOps 团队之一需要安装 SonarQube CE 以进行自动扫描、嗅探、漏洞检查。所以我让它在 AWS 中运行,通过我们的 VPN,DNS 解析为一个面向内部的 ALB,该 ALB 将流量指向构成 ECS 集群的目标实例组。还有一个启动配置和自动缩放组。目前每台主机只运行一个容器,希望在某个时候将其设置为 2-4。
我遇到的问题是实例正在容器端口:9000 和动态临时端口 32768 上注册到目标组。对动态端口的运行状况检查工作正常,但是对端口 9000 的运行状况检查失败。这导致实例在初始、不健康和重复终止之间循环。除了这个烦人的问题,应用程序运行良好。RDS 连接,我们可以使用 SonarQube 就好了。
我已经尝试在 Terraform 中删除对容器端口的引用,我还要声明这是一个非常安全的环境。来自任何 VPC 的所有出口流量都通过 McAffee Cloud Proxy 应用程序过滤掉。当我第一次在沙盒帐户中站起来时,出口到 0.0.0.0/0 一切正常。我现在已经在这上面花了几个小时,到了挠头的地步。
希望其他人来过这里并分享他们的见解。毕竟明天是新的一天。帮助!
ERROR Message when I remove the port from the target group
aws_lb_target_group.ecs: port should be set when target type is instance
ERROR Message when I set the port to 0
aws_ecs_service.ecs: InvalidParameterException: The container sonarqube did not have a container port 0 defined.
ERROR Message when I set the container port to 0 in the taskdef.
aws_ecs_task_definition.task: ClientException: Invalid 'containerPort' setting for container 'sonarqube'
ecs-taskdef.tf
resource "aws_ecs_task_definition" "task" {
family = "${var.name}-${var.env}"
network_mode = "bridge"
cpu = 8192
memory = 16384
execution_role_arn = "${var.ecs-exec-role}"
container_definitions = <<DEFINITION
[
{
"name": "${var.name}",
"image":"${var.image}",
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/${var.cluster_name}-${var.name}",
"awslogs-region": "${var.region}",
"awslogs-stream-prefix": "ecs"
}
},
"portMappings": [
{
"containerPort": 9000
}
],
"environment": [
{
"name": "sonar.jdbc.password",
"value": "${var.password}"
},
{
"name": "sonar.jdbc.url",
"value": "jdbc:mysql://${var.rds_url}:${var.port}/sonar?useUnicode=true&characterEncoding=utf8&rewriteBatchedStatements=true&useConfigs=maxPerformance"
},
{
"name": "sonar.jdbc.username",
"value": "${var.username}"
}
]
}
]
DEFINITION
}
resource "aws_ecs_service" "ecs" {
name = "${var.name}-${var.env}"
cluster = "${var.cluster_name}"
task_definition = "${aws_ecs_task_definition.task.arn}"
scheduling_strategy = "DAEMON"
lifecycle {
ignore_changes = ["desired_count"]
}
load_balancer {
target_group_arn = "${aws_lb_target_group.ecs.arn}"
container_name = "${var.name}"
container_port = 9000 #Removed & Terraform complains with an error.
}
}
elb.tf
resource "aws_lb" "ecs" {
name = "${var.name_prefix}-${var.name}-tf"
internal = true
load_balancer_type = "application"
security_groups = ["${var.security_groups}"]
subnets = ["${var.subnets}"]
enable_deletion_protection = false
tags = "${merge(var.tags, map("Name", "${var.name_prefix}-${var.name}-elb"))}"
}
resource "aws_lb_listener" "ecs" {
load_balancer_arn = "${aws_lb.ecs.arn}"
port = 80
protocol = "HTTP"
default_action {
type = "redirect"
redirect {
port = "443"
protocol = "HTTPS"
status_code = "HTTP_301"
}
}
}
resource "aws_lb_listener" "ssl" {
load_balancer_arn = "${aws_lb.ecs.arn}"
port = 443
protocol = "HTTPS"
lifecycle {
create_before_destroy = true
}
ssl_policy = "ELBSecurityPolicy-2016-08"
certificate_arn = "arn:aws:acm:REDACTED"
default_action {
type = "forward"
target_group_arn = "${aws_lb_target_group.ecs.arn}"
}
}
resource "aws_lb_target_group" "ecs" {
name = "${var.cluster_name}"
protocol = "HTTP"
port = 9000 #must be here or TF errors instance type must have port
vpc_id = "${var.vpc_id}"
lifecycle {
create_before_destroy = true
}
}
ec2.tf
resource "aws_autoscaling_group" "asg" {
availability_zones = ["${var.region}a", "${var.region}b", "${var.region}d"]
name = "${var.name}-${var.env}-asg"
max_size = "${var.min_size}"
min_size = "${var.max_size}"
health_check_grace_period = 300
health_check_type = "ELB"
desired_capacity = "${var.desired_size}"
launch_configuration = "${aws_launch_configuration.alc.name}"
vpc_zone_identifier = ["${var.subnet_ids}"]
target_group_arns = ["${var.target_arn}"]
lifecycle {
create_before_destroy = true
}
tag {
key = "Environment"
value = "${var.name}"
propagate_at_launch = true
}
tag {
key = "Name"
value = "${var.name_prefix}-${var.name}.ecs"
propagate_at_launch = true
}
}
resource "aws_launch_configuration" "alc" {
name_prefix = "${var.name_prefix}.ecs"
image_id = "${lookup(var.ecs-images, var.region)}"
instance_type = "${var.instance_type}"
iam_instance_profile = "${aws_iam_instance_profile.ecs-instance-profile.arn}"
user_data = "${data.template_file.userdata.rendered}"
key_name = "${var.key_name}"
security_groups = ["${var.security_groups}"]
lifecycle {
create_before_destroy = true
}
root_block_device {
volume_type = "io1"
iops = "1000"
volume_size = "${var.volume_size}"
}
}
data "template_file" "userdata" {
template = "${file("${path.module}/userdata/ecs-instances.sh")}"
vars {
cluster-name = "${aws_ecs_cluster.cluster.name}"
}
}
resource "aws_security_group" "allow_all_from_cluster" {
name = "${var.name_prefix}-${var.name}-ecs-cluster"
description = "Allow traffic from cluster"
vpc_id = "${var.vpc_id}"
tags = "${merge(var.tags, map("Name", "${var.name_prefix}-${var.name}-sg"))}"
lifecycle {
create_before_destroy = true
}
ingress { #open to VPC IP's
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["172.27.160.0/22"]
}
ingress { #open to corp network redirected to 443
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["10.0.0.0/8"]
}
ingress { #http access for corp users
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["10.0.0.0/8"]
}
egress { #open to VPC IP's
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["172.27.160.0/22"]
}
egress { #ephemeral response to corp users
from_port = 32768
to_port = 65535
protocol = "tcp"
cidr_blocks = ["10.0.0.0/8"]
}
}
iam.tf
resource "aws_iam_role" "iam_role" {
name = "${var.name}-ecs-role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ecs.amazonaws.com"
},
"Action": "sts:AssumeRole"
},
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
EOF
}
resource "aws_iam_policy" "efs-policy" {
name = "${var.env}-efs-access-policy"
path = "/"
description = "Allow ${var.env} cluster access to EFS"
policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"elasticfilesystem:*"
],
"Effect": "Allow",
"Resource": "*"
}
]
}
EOF
}
resource "aws_iam_role_policy_attachment" "ecs-service-role" {
role = "${aws_iam_role.iam_role.name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole"
}
resource "aws_iam_role_policy_attachment" "ecs-service-for-ec2-role" {
role = "${aws_iam_role.iam_role.name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
}
resource "aws_iam_role_policy_attachment" "ssm-service-role" {
role = "${aws_iam_role.iam_role.name}"
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM"
}
resource "aws_iam_role_policy_attachment" "efs-for-ec2-role" {
role = "${aws_iam_role.iam_role.name}"
policy_arn = "${aws_iam_policy.efs-policy.arn}"
}
resource "aws_iam_instance_profile" "ecs-instance-profile" {
name = "${var.env}-ecs"
role = "${aws_iam_role.iam_role.name}"
}
预期的运行状况检查仅在动态端口上进行。我可以从端口 9000 上的目标组中删除实例。对于两个端口,每个实例在已注册的目标部分中显示两次。我删除了端口 9000 并且实例保持在服务状态。