这是在 Terraform terraform-providers/terraform-provider-aws中提出的,但似乎提供者问题并没有引起太多关注(我知道 terraform 团队很小,所以他们无法处理所有问题,尤其是关于提供者) - 但这对于 AWS 提供商来说确实是一个阻塞问题,而且很容易重现。
下面是我们的测试配置
variable "domain_name" {
default = "mytest.com"
}
variable "ssl_policy" {
default = "ELBSecurityPolicy-2016-08"
}
data "aws_acm_certificate" "mytest_certificate" {
domain = "*.${var.domain_name}"
}
resource "aws_alb" "alb" {
name = "khiem-test-alb"
internal = false
security_groups = ["sg-35482152"]
subnets = ["subnet-04c29a60", "subnet-d05915a6"]
lifecycle {
create_before_destroy = true
}
}
resource "aws_alb_target_group" "author_target_group" {
name = "khiem-author-target-group"
port = 8080
protocol = "HTTP"
vpc_id = "vpc-32c75856"
health_check = {
protocol = "HTTP"
path = "/.healthcheck/"
port = 8080
healthy_threshold = 5
unhealthy_threshold = 2
timeout = 5
interval = 30
matcher = "200"
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_alb_target_group_attachment" "author_target_group_att" {
target_group_arn = "${aws_alb_target_group.author_target_group.arn}"
target_id = "i-0b305d179d6aacf57"
port = 8080
lifecycle {
create_before_destroy = true
}
}
resource "aws_alb_target_group" "public_target_group" {
name = "khiem-public-target-group"
port = 8080
protocol = "HTTP"
vpc_id = "vpc-32c75856"
health_check = {
protocol = "HTTP"
path = "/.healthcheck/"
port = 8080
healthy_threshold = 5
unhealthy_threshold = 2
timeout = 5
interval = 30
matcher = "200"
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_alb_target_group_attachment" "public_target_group_att" {
target_group_arn = "${aws_alb_target_group.public_target_group.arn}"
target_id = "i-0b305d179d6aacf57"
port = 8080
lifecycle {
create_before_destroy = true
}
}
# http listener
resource "aws_alb_listener" "alb_http_listener" {
load_balancer_arn = "${aws_alb.alb.arn}"
port = "80"
protocol = "HTTP"
default_action {
target_group_arn = "${aws_alb_target_group.public_target_group.arn}"
type = "forward"
}
lifecycle {
create_before_destroy = true
}
}
# http listener rules
resource "aws_alb_listener_rule" "alb_http_public_rule" {
listener_arn = "${aws_alb_listener.alb_http_listener.arn}"
priority = 100
action {
type = "forward"
target_group_arn = "${aws_alb_target_group.public_target_group.arn}"
}
condition {
field = "host-header"
values = ["public-khiem.${var.domain_name}"]
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_alb_listener_rule" "alb_http_author_rule" {
listener_arn = "${aws_alb_listener.alb_http_listener.arn}"
priority = 99
action {
type = "forward"
target_group_arn = "${aws_alb_target_group.author_target_group.arn}"
}
condition {
field = "host-header"
values = ["author-khiem.${var.domain_name}"]
}
lifecycle {
create_before_destroy = true
}
}
# https listener
resource "aws_alb_listener" "alb_https_listener" {
load_balancer_arn = "${aws_alb.alb.arn}"
port = "443"
protocol = "HTTPS"
ssl_policy = "${var.ssl_policy}"
certificate_arn = "${data.aws_acm_certificate.mytest_certificate.arn}"
default_action {
target_group_arn = "${aws_alb_target_group.public_target_group.arn}"
type = "forward"
}
lifecycle {
create_before_destroy = true
}
}
# https listener rules
resource "aws_alb_listener_rule" "alb_https_public_rule" {
listener_arn = "${aws_alb_listener.alb_https_listener.arn}"
priority = 100
action {
type = "forward"
target_group_arn = "${aws_alb_target_group.public_target_group.arn}"
}
condition {
field = "host-header"
values = ["public-khiem.${var.domain_name}"]
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_alb_listener_rule" "alb_https_author_rule" {
listener_arn = "${aws_alb_listener.alb_https_listener.arn}"
priority = 99
action {
type = "forward"
target_group_arn = "${aws_alb_target_group.author_target_group.arn}"
}
condition {
field = "host-header"
values = ["author-khiem.${var.domain_name}"]
}
lifecycle {
create_before_destroy = true
}
}
基本上,配置只是创建一个应用程序负载均衡器、2 个目标组和 http/https 侦听器,以根据域将请求路由到每个目标组。
这个简单的设置应该(过去确实如此)正常工作 - 就在最近我们发现它在创建/销毁时都变得不稳定,HTTP 或 HTTPS 侦听器资源不知何故未正确记录在 Terraform 状态,并导致其他错误资源依赖于它们(如aws_alb_listener_rule),下面是创建时的错误
Error applying plan:
2 error(s) occurred:
* aws_alb_listener_rule.alb_http_public_rule: Resource 'aws_alb_listener.alb_http_listener' does not have attribute 'arn' for variable 'aws_alb_listener.alb_http_listener.arn'
* aws_alb_listener_rule.alb_http_author_rule: Resource 'aws_alb_listener.alb_http_listener' does not have attribute 'arn' for variable 'aws_alb_listener.alb_http_listener.arn'
Terraform does not automatically rollback in the face of errors.
Instead, your Terraform state file has been partially updated with
any resources that successfully completed. Please address the error
above and apply again to incrementally change your infrastructure.
这不会每次都发生,但最近越来越频繁,可以通过运行一系列命令来更容易地重现,例如
terraform apply && terraform destroy -force && terraform apply && terraform destroy -force
我们使用 Terraform 0.9.8 和 0.10.7 进行了测试并得到了相同的不稳定性错误,如果我们再次运行相同的命令时出现错误,它主要工作 - 但这对我们的自动化过程来说是一个障碍。