创建历史服务器的默认 CF 模板包括创建安全组和 IAM 角色。我删除了两者并添加以选择现有的安全组。现在,当我运行我的 CF 模板时,它成功创建了 HistoryServerInstance,但在等待条件下失败了。你们能帮我解决我哪里出错了。附上错误截图和脚本。
谢谢。
我在 yaml 中的 CF 模板:
Parameters:
InstanceType:
Type: String
Default: t3.medium
AllowedValues:
- t3.micro
- t3.small
- t3.medium
Description: Instance Type for EC2 instance which hosts Spark history server.
LatestAmiId:
Type: AWS::SSM::Parameter::Value<AWS::EC2::Image::Id>
Description: Latest AMI ID of Amazon Linux 2 for Spark history server instance. You can use the default value.
Default: /aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2
InstanceSecurityGroup:
Description: "Select Security Group"
Type: AWS::EC2::SecurityGroup::Id
VpcId:
Type: AWS::EC2::VPC::Id
Description: "VPC ID for Spark history server instance."
Default: ''
SubnetId:
Type: AWS::EC2::Subnet::Id
Description: Subnet ID for Spark history server instance.
Default: ''
IpAddressRange:
Type: String
Description: "IP address range that can be used to view the Spark UI."
MinLength: 9
MaxLength: 18
HistoryServerPort:
Type: Number
Description: History Server Port for the Spark UI.
Default: 18080
MinValue: 1150
MaxValue: 65535
EventLogDir:
Type: String
Description: "*Event Log Directory* where Spark event logs are stored from the Glue job or dev endpoints. You must use s3a:// for the event logs path scheme"
Default: s3a://hcg-stagingaas6377-sandbox/logs/
SparkPackageLocation:
Type: String
Description: You can use the default value.
Default: 'https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-without-hadoop.tgz'
KeystorePath:
Type: String
Description: SSL/TLS keystore path for HTTPS. If you want to use custom keystore file, you can specify the S3 path s3://path_to_your_keystore_file here. If you leave this parameter empty, self-signed certificate based keystore is used.
KeystorePassword:
Type: String
NoEcho: true
Description: SSL/TLS keystore password for HTTPS. A valid password can contain 6 to 30 characters.
MinLength: 6
MaxLength: 30
Metadata:
AWS::CloudFormation::Interface:
ParameterGroups:
-
Label:
default: Spark UI Configuration
Parameters:
- IpAddressRange
- HistoryServerPort
- EventLogDir
- SparkPackageLocation
- KeystorePath
- KeystorePassword
-
Label:
default: EC2 Instance Configuration
Parameters:
- InstanceType
- LatestAmiId
- VpcId
- SubnetId
Mappings:
MemoryBasedOnInstanceType:
t3.micro:
SparkDaemonMemory: '512m'
t3.small:
SparkDaemonMemory: '1g'
t3.medium:
SparkDaemonMemory: '3g'
Resources:
HistoryServerInstance:
Type: AWS::EC2::Instance
Properties:
ImageId: !Ref LatestAmiId
InstanceType: !Ref InstanceType
SubnetId: !Ref SubnetId
SecurityGroupIds:
- !Ref InstanceSecurityGroup
UserData:
'Fn::Base64': !Sub |
#!/bin/bash -xe
yum update -y aws-cfn-bootstrap
echo "CA_OVERRIDE=/etc/pki/tls/certs/ca-bundle.crt" >> /etc/environment
export CA_OVERRIDE=/etc/pki/tls/certs/ca-bundle.crt
rpm -Uvh https://s3.amazonaws.com/amazoncloudwatch-agent/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm
/opt/aws/bin/cfn-init -v -s ${AWS::StackName} -r HistoryServerInstance --region ${AWS::Region}
/opt/aws/bin/cfn-signal -e -s ${AWS::StackName} -r HistoryServerInstance --region ${AWS::Region}
Metadata:
AWS::CloudFormation::Init:
configSets:
default:
- cloudwatch_agent_configure
- cloudwatch_agent_restart
- spark_download
- spark_init
- spark_configure
- spark_hs_start
- spark_hs_test
cloudwatch_agent_configure:
files:
/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json:
content: !Sub |
{
"logs": {
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "/var/log/cfn-init.log",
"log_group_name": "/aws-glue/sparkui_cfn/cfn-init.log"
},
{
"file_path": "/opt/spark/logs/spark-*",
"log_group_name": "/aws-glue/sparkui_cfn/spark_history_server.log"
}
]
}
}
}
}
cloudwatch_agent_restart:
commands:
01_stop_service:
command: /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a stop
02_start_service:
command: /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -s
spark_download:
packages:
yum:
java-1.8.0-openjdk: []
maven: []
python3: []
python3-pip: []
sources:
/opt: !Ref SparkPackageLocation
commands:
create-symlink:
command: ln -s /opt/spark-* /opt/spark
export:
command: !Sub |
echo "export JAVA_HOME=/usr/lib/jvm/jre" | sudo tee -a /etc/profile.d/jdk.sh
echo "export SPARK_HOME=/opt/spark" | sudo tee -a /etc/profile.d/spark.sh
export JAVA_HOME=/usr/lib/jvm/jre
export SPARK_HOME=/opt/spark
download-pom-xml:
command: curl -o /tmp/pom.xml https://aws-glue-sparkui-prod-us-east-1.s3.amazonaws.com/public/mvn/pom.xml
download-setup-py:
command: curl -o /tmp/setup.py https://aws-glue-sparkui-prod-us-east-1.s3.amazonaws.com/public/misc/setup.py
download-systemd-file:
command: curl -o /usr/lib/systemd/system/spark-history-server.service https://aws-glue-sparkui-prod-us-east-1.s3.amazonaws.com/public/misc/spark-history-server.service
spark_init:
commands:
download-mvn-dependencies:
command: cd /tmp; mvn dependency:copy-dependencies -DoutputDirectory=/opt/spark/jars/
install-boto:
command: pip3 install boto --user; pip3 install boto3 --user
files:
/opt/spark/conf/spark-defaults.conf:
content: !Sub |
spark.eventLog.enabled true
spark.history.fs.logDirectory ${EventLogDir}
spark.history.ui.port 0
spark.ssl.historyServer.enabled true
spark.ssl.historyServer.port ${HistoryServerPort}
spark.ssl.historyServer.keyStorePassword ${KeystorePassword}
group: ec2-user
mode: '000644'
owner: ec2-user
/opt/spark/conf/spark-env.sh:
content: !Sub
- |
export SPARK_DAEMON_MEMORY=${SparkDaemonMemoryConfig}
export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem"
- SparkDaemonMemoryConfig: !FindInMap [ MemoryBasedOnInstanceType, !Ref InstanceType, SparkDaemonMemory ]
group: ec2-user
mode: '000644'
owner: ec2-user
spark_configure:
commands:
create-symlink:
command: ln -s /usr/lib/systemd/system/spark-history-server.service /etc/systemd/system/multi-user.target.wants/
enable-spark-hs:
command: systemctl enable spark-history-server
configure-keystore:
command: !Sub |
python3 /tmp/setup.py --keystore "${KeystorePath}" --keystorepw "${KeystorePassword}" > /tmp/setup_py.log 2>&1
spark_hs_start:
commands:
start_spark_hs_server:
command: systemctl start spark-history-server
spark_hs_test:
commands:
check-spark-hs-server:
command: !Sub |
curl --retry 60 --retry-delay 10 --retry-max-time 600 --retry-connrefused https://localhost:${HistoryServerPort} --insecure;
/opt/aws/bin/cfn-signal -e $? "${WaitHandle}"
WaitHandle:
Type: AWS::CloudFormation::WaitConditionHandle
WaitCondition:
Type: AWS::CloudFormation::WaitCondition
DependsOn: HistoryServerInstance
Properties:
Handle: !Ref WaitHandle
Timeout: 1200
Outputs:
SparkUiPublicUrl:
Description: The Public URL of Spark UI
Value: !Join
- ''
- - 'https://'
- !GetAtt 'HistoryServerInstance.PublicDnsName'
- ':'
- !Ref HistoryServerPort
SparkUiPrivateUrl:
Description: The Private URL of Spark UI
Value: !Join
- ''
- - 'https://'
- !GetAtt 'HistoryServerInstance.PrivateDnsName'
- ':'
- !Ref HistoryServerPort
CloudWatchLogsCfnInit:
Description: CloudWatch Logs Console URL for cfn-init.log in History Server Instance
Value: !Join
- ''
- - 'https://console.aws.amazon.com/cloudwatch/home?region='
- !Ref AWS::Region
- '#logEventViewer:group=/aws-glue/sparkui_cfn/cfn-init.log;stream='
- !Ref HistoryServerInstance
CloudWatchLogsSparkHistoryServer:
Description: CloudWatch Logs Console URL for spark history server logs in History Server Instance
Value: !Join
- ''
- - 'https://console.aws.amazon.com/cloudwatch/home?region='
- !Ref AWS::Region
- '#logEventViewer:group=/aws-glue/sparkui_cfn/spark_history_server.log;stream='
- !Ref HistoryServerInstance