Skip to content
This repository was archived by the owner on Aug 9, 2023. It is now read-only.

Commit 17807d1

Browse files
authored
Merge pull request #25 from wleepang/nextflow-updates
Nextflow updates
2 parents 6670a82 + a3b13de commit 17807d1

File tree

4 files changed

+242
-8
lines changed

4 files changed

+242
-8
lines changed

src/containers/nextflow/Dockerfile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
FROM centos:7 AS build
2+
3+
RUN yum update -y \
4+
&& yum install -y \
5+
curl \
6+
java-1.8.0-openjdk \
7+
awscli \
8+
&& yum clean -y all
9+
10+
ENV JAVA_HOME /usr/lib/jvm/jre-openjdk/
11+
12+
WORKDIR /opt/inst
13+
RUN curl -s https://get.nextflow.io | bash
14+
RUN mv nextflow /usr/local/bin
15+
16+
COPY nextflow.aws.sh /opt/bin/nextflow.aws.sh
17+
RUN chmod +x /opt/bin/nextflow.aws.sh
18+
19+
WORKDIR /opt/work
20+
ENTRYPOINT ["/opt/bin/nextflow.aws.sh"]
21+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/bash
2+
3+
NEXTFLOW_SCRIPT=$1
4+
5+
# Create the default config using environment variables
6+
# passed into the container
7+
mkdir -p /opt/config
8+
NF_CONFIG=/opt/config/nextflow.config
9+
10+
cat << EOF > $NF_CONFIG
11+
workDir = "$NF_WORKDIR"
12+
process.executor = "awsbatch"
13+
process.queue = "$NF_JOB_QUEUE"
14+
executor.awscli = "/home/ec2-user/miniconda/bin/aws"
15+
EOF
16+
17+
# AWS Batch places multiple jobs on an instance
18+
# To avoid file path clobbering use the JobID and JobAttempt
19+
# to create a unique path
20+
GUID="$AWS_BATCH_JOB_ID/$AWS_BATCH_JOB_ATTEMPT"
21+
22+
mkdir -p /opt/work/$GUID
23+
cd /opt/work/$GUID
24+
25+
# stage workflow definition
26+
aws s3 cp --no-progress $NEXTFLOW_SCRIPT .
27+
28+
NF_FILE=$(find . -name "*.nf")
29+
30+
echo "== Nextflow Configuration =="
31+
cat $NF_CONFIG
32+
33+
echo "== Running Workflow =="
34+
nextflow -c $NF_CONFIG run $NF_FILE

src/templates/nextflow/nextflow-aio.template.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ Metadata:
2727
- ExistingDataBucket
2828
- KeyPairName
2929
- AvailabilityZones
30-
- NextflowContainerImage
3130
- Label:
3231
default: "AWS Batch"
3332
Parameters:
@@ -39,6 +38,7 @@ Metadata:
3938
- Label:
4039
default: "Nextflow Resources"
4140
Parameters:
41+
- NextflowContainerImage
4242
- S3NextflowBucketName
4343
- ExistingNextflowBucket
4444
- S3NextflowScriptPrefix
@@ -139,8 +139,10 @@ Parameters:
139139
NextflowContainerImage:
140140
Type: String
141141
Description: >-
142-
Container image for nextflow with custom entrypoint for workflow
143-
script staging. (Example, "<dockerhubuser>/nextflow:latest")
142+
(Optional) Container image for nextflow with custom entrypoint for config and workflow
143+
script staging. (Example, "<dockerhubuser>/nextflow:latest").
144+
Provide this if you have a specific version of nextflow you want to use, otherwise a
145+
container will be built using the latest version.
144146
145147
TemplateRootUrl:
146148
Type: String
@@ -214,6 +216,9 @@ Resources:
214216

215217

216218
Outputs:
219+
NextflowContainerImage:
220+
Value: !GetAtt NextflowStack.Outputs.NextflowContainerImage
221+
217222
NextflowJobDefinition:
218223
Value: !GetAtt NextflowStack.Outputs.NextflowJobDefinition
219224
Description: >-

src/templates/nextflow/nextflow-resources.template.yaml

Lines changed: 179 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@ Metadata:
2525
- S3DataBucketName
2626
- S3NextflowBucketName
2727
- ExistingBucket
28-
- NextflowContainerImage
28+
- BatchDefaultJobQueue
2929
- Label:
3030
default: "Optional"
3131
Parameters:
32+
- NextflowContainerImage
3233
- S3ScriptPrefix
3334
- S3WorkDirPrefix
3435

@@ -67,8 +68,10 @@ Parameters:
6768
NextflowContainerImage:
6869
Type: String
6970
Description: >-
70-
Container image for nextflow with custom entrypoint for config and workflow
71-
script staging. (Example, "<dockerhubuser>/nextflow:latest")
71+
(Optional) Container image for nextflow with custom entrypoint for config and workflow
72+
script staging. (Example, "<dockerhubuser>/nextflow:latest").
73+
Provide this if you have a specific version of nextflow you want to use, otherwise a
74+
container will be built using the latest version.
7275
7376
BatchDefaultJobQueue:
7477
Type: String
@@ -80,7 +83,12 @@ Conditions:
8083
Fn::Equals:
8184
- !Ref S3NextflowBucketName
8285
- !Ref S3DataBucketName
83-
86+
87+
BuildNextflowContainer:
88+
Fn::Equals:
89+
- !Ref NextflowContainerImage
90+
- ""
91+
8492
NextflowBucketDoesNotExist:
8593
Fn::Equals:
8694
- !Ref ExistingBucket
@@ -110,6 +118,70 @@ Resources:
110118
SSEAlgorithm: AES256
111119
Tags: !FindInMap ["TagMap", "default", "tags"]
112120

121+
IAMCodeBuildRole:
122+
Type: AWS::IAM::Role
123+
Condition: BuildNextflowContainer
124+
Properties:
125+
AssumeRolePolicyDocument:
126+
Version: '2012-10-17'
127+
Statement:
128+
- Effect: Allow
129+
Principal:
130+
Service: codebuild.amazonaws.com
131+
Action: sts:AssumeRole
132+
133+
Policies:
134+
- PolicyName: !Sub codebuild-ecr-access-${AWS::Region}
135+
PolicyDocument:
136+
Version: 2012-10-17
137+
Statement:
138+
Effect: Allow
139+
Resource: "*"
140+
Action:
141+
- "ecr:CreateRepository"
142+
- "ecr:BatchCheckLayerAvailability"
143+
- "ecr:CompleteLayerUpload"
144+
- "ecr:GetAuthorizationToken"
145+
- "ecr:InitiateLayerUpload"
146+
- "ecr:PutImage"
147+
- "ecr:UploadLayerPart"
148+
- PolicyName: !Sub codebuild-logs-access-${AWS::Region}
149+
PolicyDocument:
150+
Version: 2012-10-17
151+
Statement:
152+
Effect: Allow
153+
Resource: "*"
154+
Action:
155+
- logs:CreateLogGroup
156+
- logs:CreateLogStream
157+
- logs:PutLogEvents
158+
159+
IAMLambdaExecutionRole:
160+
Type: AWS::IAM::Role
161+
Condition: BuildNextflowContainer
162+
Properties:
163+
AssumeRolePolicyDocument:
164+
Version: "2012-10-17"
165+
Statement:
166+
- Effect: Allow
167+
Principal:
168+
Service: lambda.amazonaws.com
169+
Action: "sts:AssumeRole"
170+
Path: /
171+
ManagedPolicyArns:
172+
- arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
173+
- arn:aws:iam::aws:policy/service-role/AWSLambdaRole
174+
Policies:
175+
- PolicyName: !Sub codebuild-access-${AWS::Region}
176+
PolicyDocument:
177+
Version: "2012-10-17"
178+
Statement:
179+
- Effect: Allow
180+
Action:
181+
- "codebuild:StartBuild"
182+
- "codebuild:BatchGetBuilds"
183+
Resource: "*"
184+
113185
IAMNextflowJobRole:
114186
Type: AWS::IAM::Role
115187
Properties:
@@ -152,6 +224,95 @@ Resources:
152224
ManagedPolicyArns:
153225
- "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
154226

227+
CodeBuildProject:
228+
Type: AWS::CodeBuild::Project
229+
Condition: BuildNextflowContainer
230+
Properties:
231+
Name: !Sub
232+
- nextflow-container-${GUID}
233+
- GUID: !Select [ 2, !Split [ "/", !Ref "AWS::StackId" ]]
234+
Description: >-
235+
Builds a nextflow container for running genomics workflows
236+
Artifacts:
237+
Type: NO_ARTIFACTS
238+
Environment:
239+
Type: LINUX_CONTAINER
240+
Image: aws/codebuild/standard:1.0
241+
ComputeType: BUILD_GENERAL1_SMALL
242+
PrivilegedMode: True
243+
244+
ServiceRole: !GetAtt IAMCodeBuildRole.Arn
245+
Source:
246+
Type: NO_SOURCE
247+
BuildSpec: !Sub
248+
- |-
249+
version: 0.2
250+
phases:
251+
pre_build:
252+
commands:
253+
- echo "Docker Login to ECR"
254+
- $(aws ecr get-login --no-include-email --region ${AWS::Region})
255+
- echo "Creating ECR image repository"
256+
- aws ecr create-repository --repository-name nextflow || true
257+
- echo "Getting source code from Github"
258+
- git clone https://github.com/aws-samples/aws-genomics-workflows.git
259+
- cd aws-genomics-workflows
260+
- cd src/containers/nextflow
261+
build:
262+
commands:
263+
- echo "Building container"
264+
- docker build -t nextflow .
265+
post_build:
266+
commands:
267+
- echo "Tagging container image"
268+
- docker tag nextflow:latest ${REGISTRY}/nextflow:latest
269+
- echo "Pushing container image to ECR"
270+
- docker push ${REGISTRY}/nextflow:latest
271+
- REGISTRY: !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com
272+
273+
Tags: !FindInMap ["TagMap", "default", "tags"]
274+
275+
CodeBuildInvocation:
276+
Type: Custom::CodeBuildInvocation
277+
Condition: BuildNextflowContainer
278+
Properties:
279+
ServiceToken: !GetAtt CodeBuildInvocationFunction.Arn
280+
BuildProject: !Ref CodeBuildProject
281+
282+
CodeBuildInvocationFunction:
283+
Type: AWS::Lambda::Function
284+
Condition: BuildNextflowContainer
285+
Properties:
286+
Handler: index.handler
287+
Role: !GetAtt IAMLambdaExecutionRole.Arn
288+
Runtime: python3.7
289+
Timeout: 600
290+
Code:
291+
ZipFile: |
292+
from time import sleep
293+
294+
import boto3
295+
import cfnresponse
296+
297+
def handler(event, context):
298+
if event['RequestType'] in ("Create", "Update"):
299+
codebuild = boto3.client('codebuild')
300+
build = codebuild.start_build(
301+
projectName=event["ResourceProperties"]["BuildProject"]
302+
)['build']
303+
304+
id = build['id']
305+
status = build['buildStatus']
306+
while status == 'IN_PROGRESS':
307+
sleep(10)
308+
build = codebuild.batch_get_builds(ids=[id])['builds'][0]
309+
status = build['buildStatus']
310+
311+
if status != "SUCCEEDED":
312+
cfnresponse.send(event, context, cfnresponse.FAILED, None)
313+
314+
cfnresponse.send(event, context, cfnresponse.SUCCESS, None)
315+
155316
BatchNextflowJobDefinition:
156317
Type: AWS::Batch::JobDefinition
157318
Properties:
@@ -178,7 +339,11 @@ Resources:
178339
Memory: 1024
179340
JobRoleArn: !GetAtt IAMNextflowJobRole.Arn
180341
Vcpus: 2
181-
Image: !Ref NextflowContainerImage
342+
Image:
343+
Fn::If:
344+
- BuildNextflowContainer
345+
- !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/nextflow:latest
346+
- !Ref NextflowContainerImage
182347
Environment:
183348
- Name: "NF_JOB_QUEUE"
184349
Value: !Ref BatchDefaultJobQueue
@@ -218,6 +383,15 @@ Outputs:
218383
the bucket.
219384
Value: !Ref S3WorkDirPrefix
220385

386+
NextflowContainerImage:
387+
Description: >-
388+
The nextflow container used.
389+
Value:
390+
Fn::If:
391+
- BuildNextflowContainer
392+
- !Sub ${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com/nextflow:latest
393+
- !Ref NextflowContainerImage
394+
221395
NextflowJobDefinition:
222396
Description: >-
223397
Batch Job Definition that creates a nextflow head node for running workflows

0 commit comments

Comments
 (0)