Skip to content

Commit f651cc4

Browse files
Merge pull request #47 from aws-solutions-library-samples/o11n-refactor
[Break Glass] Change CW agent and Fluent Bit to Cloudwatch Observability
2 parents 83054fb + 1002ba4 commit f651cc4

File tree

7 files changed

+173
-84
lines changed

7 files changed

+173
-84
lines changed

.github/workflows/pr-triage.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
permissions:
1111
pull-requests: write
1212
steps:
13-
- uses: actions/checkout@v3
13+
- uses: actions/checkout@v4
1414
- name: Get PR author
1515
id: get-author
1616
run: |
@@ -30,7 +30,7 @@ jobs:
3030
- name: Auto-approve if author is able to write and contains only doc change
3131
id: doc-change
3232
if: steps.author-permission.outputs.require-result == 'true'
33-
uses: actions/github-script@v6
33+
uses: actions/github-script@v7
3434
with:
3535
github-token: ${{ secrets.GITHUB_TOKEN }}
3636
script: |

CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
CODEOWNERS @aws-solutions-library-samples/maintainers
1+
* @yubingjiaocn @bnusunny @github-actions
2+
CODEOWNERS @aws-solutions-library-samples/maintainers @yubingjiaocn
23
/.github/workflows/maintainer_workflows.yml @aws-solutions-library-samples/maintainers
34
/.github/solutionid_validator.sh @aws-solutions-library-samples/maintainers

lib/addons/s3CSIDriver.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as blueprints from '@aws-quickstart/eks-blueprints';
2-
import { ManagedPolicy } from 'aws-cdk-lib/aws-iam';
2+
import * as eks from 'aws-cdk-lib/aws-eks';
33
import * as iam from 'aws-cdk-lib/aws-iam';
44
import { Construct } from "constructs";
55

@@ -12,7 +12,7 @@ export const defaultProps: blueprints.addons.HelmAddOnProps & s3CSIDriverAddOnPr
1212
name: 's3CSIDriverAddOn',
1313
namespace: 'kube-system',
1414
release: 's3-csi-driver-release',
15-
version: 'v1.7.0',
15+
version: 'v1.10.0',
1616
repository: 'https://awslabs.github.io/mountpoint-s3-csi-driver',
1717
s3BucketArn: ''
1818
}
@@ -31,6 +31,7 @@ export class s3CSIDriverAddOn extends blueprints.addons.HelmAddOn {
3131
const serviceAccount = cluster.addServiceAccount('s3-csi-driver-sa', {
3232
name: 's3-csi-driver-sa',
3333
namespace: this.options.namespace,
34+
identityType: eks.IdentityType.POD_IDENTITY
3435
});
3536

3637
// new IAM policy to grand access to S3 bucket

lib/dataPlane.ts

Lines changed: 30 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -48,49 +48,33 @@ export default class DataPlaneStack {
4848
irsaRoles: ["CloudWatchFullAccess", "AmazonSQSFullAccess"]
4949
};
5050

51-
const CloudWatchLogsWritePolicy = new iam.PolicyStatement({
52-
actions: [
53-
"logs:CreateLogGroup",
54-
"logs:CreateLogStream",
55-
"logs:DescribeLogStreams",
56-
"logs:PutLogEvents",
57-
"logs:GetLogEvents"
58-
],
59-
resources: ["*"],
60-
})
61-
62-
const awsForFluentBitParams: blueprints.AwsForFluentBitAddOnProps = {
63-
iamPolicies: [CloudWatchLogsWritePolicy],
64-
namespace: "amazon-cloudwatch",
65-
values: {
66-
cloudWatchLogs: {
67-
region: cdk.Aws.REGION,
68-
logRetentionDays: 7
69-
},
70-
tolerations: [{
71-
"operator": "Exists",
72-
"effect": "NoSchedule"
73-
}]
74-
},
75-
createNamespace: true
76-
}
77-
78-
const containerInsightsParams: blueprints.ContainerInsightAddonProps = {
79-
values: {
80-
adotCollector: {
81-
daemonSet: {
82-
tolerations: [{
83-
"operator": "Exists",
84-
"effect": "NoSchedule"
85-
}],
86-
cwreceivers: {
87-
preferFullPodName: "true",
88-
addFullPodNameMetricLabel: "true"
51+
const cloudWatchInsightsParams: blueprints.CloudWatchInsightsAddOnProps = {
52+
configurationValues: {
53+
tolerations: [
54+
{
55+
key: "runtime",
56+
operator: "Exists",
57+
effect: "NoSchedule"
58+
},
59+
{
60+
key: "nvidia.com/gpu",
61+
operator: "Exists",
62+
effect: "NoSchedule"
63+
}
64+
],
65+
containerLogs: {
66+
enabled: true,
67+
fluentBit: {
68+
config: {
69+
service: "[SERVICE]\n Flush 5\n Grace 30\n Log_Level info",
70+
extraFiles: {
71+
"application-log.conf": "[INPUT]\n Name tail\n Tag kube.*\n Path /var/log/containers/*.log\n Parser docker\n DB /var/log/flb_kube.db\n Mem_Buf_Limit 5MB\n Skip_Long_Lines On\n Refresh_Interval 10\n\n[FILTER]\n Name kubernetes\n Match kube.*\n Kube_URL https://kubernetes.default.svc:443\n Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token\n Kube_Tag_Prefix kube.var.log.containers.\n Merge_Log On\n Merge_Log_Key log_processed\n K8S-Logging.Parser On\n K8S-Logging.Exclude On\n\n[FILTER]\n Name grep\n Match kube.*\n Exclude $kubernetes['namespace_name'] kube-system\n\n[OUTPUT]\n Name cloudwatch\n Match kube.*\n region ${AWS_REGION}\n log_group_name /aws/containerinsights/${CLUSTER_NAME}/application\n log_stream_prefix ${HOST_NAME}-\n auto_create_group true\n retention_in_days 7"
72+
}
8973
}
9074
}
9175
}
9276
}
93-
}
77+
};
9478

9579
const SharedComponentAddOnParams: SharedComponentAddOnProps = {
9680
inputSns: blueprints.getNamedResource("inputSNSTopic"),
@@ -116,12 +100,10 @@ export default class DataPlaneStack {
116100
new blueprints.addons.AwsLoadBalancerControllerAddOn(),
117101
new blueprints.addons.KarpenterAddOn({ interruptionHandling: true }),
118102
new blueprints.addons.KedaAddOn(kedaParams),
119-
new blueprints.addons.ContainerInsightsAddOn(containerInsightsParams),
120-
new blueprints.addons.AwsForFluentBitAddOn(awsForFluentBitParams),
103+
new blueprints.addons.CloudWatchInsights(cloudWatchInsightsParams),
121104
new s3CSIDriverAddOn(s3CSIDriverAddOnParams),
122105
new SharedComponentAddOn(SharedComponentAddOnParams),
123106
new EbsThroughputTunerAddOn(EbsThroughputModifyAddOnParams),
124-
new dcgmExporterAddOn({})
125107
];
126108

127109
// Generate SD Runtime Addon for runtime
@@ -160,9 +142,9 @@ const MngProps: blueprints.MngClusterProviderProps = {
160142
minSize: 2,
161143
maxSize: 2,
162144
desiredSize: 2,
163-
version: eks.KubernetesVersion.V1_29,
164-
instanceTypes: [new ec2.InstanceType('m5.large')],
165-
amiType: eks.NodegroupAmiType.AL2_X86_64,
145+
version: eks.KubernetesVersion.V1_31,
146+
instanceTypes: [new ec2.InstanceType('m7g.large')],
147+
amiType: eks.NodegroupAmiType.AL2023_ARM_64_STANDARD,
166148
enableSsmPermissions: true,
167149
nodeGroupTags: {
168150
"Name": cdk.Aws.STACK_NAME + "-ClusterComponents",
@@ -172,7 +154,7 @@ const MngProps: blueprints.MngClusterProviderProps = {
172154

173155
// Deploy EKS cluster with all add-ons
174156
const blueprint = blueprints.EksBlueprint.builder()
175-
.version(eks.KubernetesVersion.V1_29)
157+
.version(eks.KubernetesVersion.V1_31)
176158
.addOns(...addOns)
177159
.resourceProvider(
178160
blueprints.GlobalResources.Vpc,
@@ -185,7 +167,7 @@ const blueprint = blueprints.EksBlueprint.builder()
185167
.resourceProvider("s3GWEndpoint", new s3GWEndpointProvider("s3GWEndpoint"))
186168
.clusterProvider(new blueprints.MngClusterProvider(MngProps))
187169
.build(scope, id + 'Stack', props);
188-
170+
/*
189171
// Workaround for permission denied when creating cluster
190172
const handler = blueprint.node.tryFindChild('@aws-cdk--aws-eks.KubectlProvider')!
191173
.node.tryFindChild('Handler')! as cdk.aws_lambda.Function
@@ -202,7 +184,7 @@ const blueprint = blueprints.EksBlueprint.builder()
202184
actions: ["lambda:GetFunctionConfiguration"],
203185
resources: [handler.functionArn]
204186
}))
205-
187+
*/
206188
// Provide static output name for cluster
207189
const cluster = blueprint.getClusterInfo().cluster
208190
const clusterNameCfnOutput = cluster.node.findChild('ClusterName') as cdk.CfnOutput;

lib/resourceProvider/vpc.ts

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import { Tags } from 'aws-cdk-lib';
2+
import * as ec2 from 'aws-cdk-lib/aws-ec2';
3+
import { ISubnet, PrivateSubnet } from 'aws-cdk-lib/aws-ec2';
4+
import * as blueprints from '@aws-quickstart/eks-blueprints';
5+
6+
/**
7+
* Interface for Mapping for fields such as Primary CIDR, Secondary CIDR, Secondary Subnet CIDR.
8+
*/
9+
interface VpcProps {
10+
primaryCidr?: string,
11+
secondaryCidr?: string,
12+
secondarySubnetCidrs?: string[]
13+
}
14+
15+
/**
16+
* VPC resource provider
17+
*/
18+
export class VpcProvider implements blueprints.ResourceProvider<ec2.IVpc> {
19+
readonly vpcId?: string;
20+
readonly primaryCidr?: string;
21+
readonly secondaryCidr?: string;
22+
readonly secondarySubnetCidrs?: string[];
23+
24+
constructor(vpcId?: string, private vpcProps?: VpcProps) {
25+
this.vpcId = vpcId;
26+
this.primaryCidr = vpcProps?.primaryCidr;
27+
this.secondaryCidr = vpcProps?.secondaryCidr;
28+
this.secondarySubnetCidrs = vpcProps?.secondarySubnetCidrs;
29+
}
30+
31+
provide(context: blueprints.ResourceContext): ec2.IVpc {
32+
const id = context.scope.node.id;
33+
34+
let vpc = getVPCFromId(context, id, this.vpcId);
35+
if (vpc == null) {
36+
// It will automatically divide the provided VPC CIDR range, and create public and private subnets per Availability Zone.
37+
// If VPC CIDR range is not provided, uses `10.0.0.0/16` as the range and creates public and private subnets per Availability Zone.
38+
// Network routing for the public subnets will be configured to allow outbound access directly via an Internet Gateway.
39+
// Network routing for the private subnets will be configured to allow outbound access via a set of resilient NAT Gateways (one per AZ).
40+
// Creates Secondary CIDR and Secondary subnets if passed.
41+
if (this.primaryCidr) {
42+
vpc = new ec2.Vpc(context.scope, id + "-vpc",{
43+
ipAddresses: ec2.IpAddresses.cidr(this.primaryCidr)
44+
});
45+
}
46+
else {
47+
vpc = new ec2.Vpc(context.scope, id + "-vpc");
48+
}
49+
}
50+
51+
52+
if (this.secondaryCidr) {
53+
this.createSecondarySubnets(context, id, vpc);
54+
}
55+
56+
return vpc;
57+
}
58+
59+
protected createSecondarySubnets(context: blueprints.ResourceContext, id: string, vpc: ec2.IVpc) {
60+
const secondarySubnets: Array<PrivateSubnet> = [];
61+
const secondaryCidr = new ec2.CfnVPCCidrBlock(context.scope, id + "-secondaryCidr", {
62+
vpcId: vpc.vpcId,
63+
cidrBlock: this.secondaryCidr
64+
});
65+
secondaryCidr.node.addDependency(vpc);
66+
if (this.secondarySubnetCidrs) {
67+
for (let i = 0; i < vpc.availabilityZones.length; i++) {
68+
if (this.secondarySubnetCidrs[i]) {
69+
secondarySubnets[i] = new ec2.PrivateSubnet(context.scope, id + "private-subnet-" + i, {
70+
availabilityZone: vpc.availabilityZones[i],
71+
cidrBlock: this.secondarySubnetCidrs[i],
72+
vpcId: vpc.vpcId
73+
});
74+
secondarySubnets[i].node.addDependency(secondaryCidr);
75+
context.add("secondary-cidr-subnet-" + i, {
76+
provide(_context): ISubnet { return secondarySubnets[i]; }
77+
});
78+
}
79+
}
80+
for (let secondarySubnet of secondarySubnets) {
81+
Tags.of(secondarySubnet).add("kubernetes.io/role/internal-elb", "1", { applyToLaunchedInstances: true });
82+
Tags.of(secondarySubnet).add("Name", `blueprint-construct-dev-PrivateSubnet-${secondarySubnet}`, { applyToLaunchedInstances: true });
83+
}
84+
}
85+
}
86+
}
87+
88+
89+
90+
/*
91+
** This function will give return vpc based on the ResourceContext and vpcId passed to the cluster.
92+
*/
93+
export function getVPCFromId(context: blueprints.ResourceContext, nodeId: string, vpcId?: string) {
94+
let vpc = undefined;
95+
if (vpcId) {
96+
if (vpcId === "default") {
97+
console.log(`looking up completely default VPC`);
98+
vpc = ec2.Vpc.fromLookup(context.scope, nodeId + "-vpc", { isDefault: true });
99+
} else {
100+
console.log(`looking up non-default ${vpcId} VPC`);
101+
vpc = ec2.Vpc.fromLookup(context.scope, nodeId + "-vpc", { vpcId: vpcId });
102+
}
103+
}
104+
return vpc;
105+
}

0 commit comments

Comments
 (0)