From 797ef31052aa299d4d09137a943e62769ab62158 Mon Sep 17 00:00:00 2001 From: Arlevoy Date: Tue, 2 Oct 2018 09:12:17 +0200 Subject: [PATCH 1/3] :memo: (Standard) Write health check route --- ops/health-check-route.s.md | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 ops/health-check-route.s.md diff --git a/ops/health-check-route.s.md b/ops/health-check-route.s.md new file mode 100644 index 0000000..78d20cd --- /dev/null +++ b/ops/health-check-route.s.md @@ -0,0 +1,68 @@ +# [Standard] Writing a health check route + +## Owner: Arthur Levoyer + +# Why + +In order to monitor correctly their environments, some Cloud services require a HealthCheck route which returns a status depending on how the API is running. + +## Checks + +- [ ] Do a call to every DB used by the API +- [ ] Send a 2xx status code status in case of API running correctly and 5xx status code if not +- [ ] Do the less data usage DB calls +- [ ] Include a timestamp in order not to reduce the number of successive calls + +## Examples + +In the examples below the API is concentrating calls to one database RDS and one DynamoDB + +### Example 1: Bad example + +```javascript +app.get("/health-check", (req, res) => { + res.status(200).send({ status: "OK" }); +}); +``` + +- There is no call to any of the two databases +- There is no 5xx status code if the API is not running +- There is not timestamp hence the route may be called successively every seconds + +### Example 2: Bad example + +```javascript +app.get("/health", async (req, res) => { + try { + await findAllDataCollectors(); // DataCollectors ~ 100 entries + res.status(200).send({ status: "OK" }); + } catch (error) { + res.status(503).send({ status: "KO" }); + } +}); +``` + +- There is a call to one of the database but not the other +- The call is using too much data +- There is no timestamp +- There is a 503 if the DB is down + +### Example 3: Good example + +```javascript +app.get("/health", async (req, res) => { + try { + if (nextFetchingDate && nextFetchingDate > Date.now()) { + return res.status(200).send({ status: 200, message: "OK" }); + } + + nextFetchingDate = Date.now() + TIME_CHECKING_INTERVAL; + await Promise.all([AppsDynamoRepo.getDynamoHealth(), getHealth()]); + + res.status(200).send({ status: 200, message: "OK" }); + } catch (error) { + nextFetchingDate = Date.now(); + handleError(res, error, { status: 503, error: error.message, message: "KO" }); + } +}); +``` From cd24cf7601fb741ec7920a6aba59729653dd3a90 Mon Sep 17 00:00:00 2001 From: Arlevoy Date: Tue, 9 Oct 2018 13:51:36 +0200 Subject: [PATCH 2/3] Take AM feedbacks --- ops/health-check-route.s.md | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/ops/health-check-route.s.md b/ops/health-check-route.s.md index 78d20cd..2a695f1 100644 --- a/ops/health-check-route.s.md +++ b/ops/health-check-route.s.md @@ -4,14 +4,13 @@ # Why -In order to monitor correctly their environments, some Cloud services require a HealthCheck route which returns a status depending on how the API is running. +In order to monitor correctly their environments, backend services require a HealthCheck route which returns a status depending on how the API is running. ## Checks -- [ ] Do a call to every DB used by the API +- [ ] Make a call to every database used by the API - [ ] Send a 2xx status code status in case of API running correctly and 5xx status code if not -- [ ] Do the less data usage DB calls -- [ ] Include a timestamp in order not to reduce the number of successive calls +- [ ] Make the less data usage database calls: the health check route is likely to be called very often in short period of time ## Examples @@ -27,14 +26,13 @@ app.get("/health-check", (req, res) => { - There is no call to any of the two databases - There is no 5xx status code if the API is not running -- There is not timestamp hence the route may be called successively every seconds ### Example 2: Bad example ```javascript app.get("/health", async (req, res) => { try { - await findAllDataCollectors(); // DataCollectors ~ 100 entries + await RDS.getAllEntries(); res.status(200).send({ status: "OK" }); } catch (error) { res.status(503).send({ status: "KO" }); @@ -44,7 +42,6 @@ app.get("/health", async (req, res) => { - There is a call to one of the database but not the other - The call is using too much data -- There is no timestamp - There is a 503 if the DB is down ### Example 3: Good example @@ -52,17 +49,25 @@ app.get("/health", async (req, res) => { ```javascript app.get("/health", async (req, res) => { try { - if (nextFetchingDate && nextFetchingDate > Date.now()) { - return res.status(200).send({ status: 200, message: "OK" }); - } - - nextFetchingDate = Date.now() + TIME_CHECKING_INTERVAL; - await Promise.all([AppsDynamoRepo.getDynamoHealth(), getHealth()]); - + await Promise.all([DynamoDB.getDynamoHealth(), RDS.getHealth()]); res.status(200).send({ status: 200, message: "OK" }); } catch (error) { - nextFetchingDate = Date.now(); handleError(res, error, { status: 503, error: error.message, message: "KO" }); } }); + +RDS.getHealth = async () => { + await knex.raw("select 1+1 as result"); +}; + +DynamoDB.getDynamoHealth = (): Promise> => { + return new Promise((resolve, reject) => { + dynamodb.describeTable({ TableName: dynamodbTableName }, (error, data) => { + if (error) { + return reject(error); + } + resolve(data); + }); + }); +}; ``` From 95ffe1fdf5fe347c553e2e365f94827a34f688b5 Mon Sep 17 00:00:00 2001 From: Arlevoy Date: Thu, 18 Oct 2018 09:17:55 +0200 Subject: [PATCH 3/3] Take LouisZ feedbacks --- ops/health-check-route.s.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ops/health-check-route.s.md b/ops/health-check-route.s.md index 2a695f1..6c5c739 100644 --- a/ops/health-check-route.s.md +++ b/ops/health-check-route.s.md @@ -9,12 +9,12 @@ In order to monitor correctly their environments, backend services require a Hea ## Checks - [ ] Make a call to every database used by the API -- [ ] Send a 2xx status code status in case of API running correctly and 5xx status code if not -- [ ] Make the less data usage database calls: the health check route is likely to be called very often in short period of time +- [ ] Send a 2xx status code status if the API is running correctly, 5xx status code if not +- [ ] Make database calls retrieving as little data as possible: the health check route is likely to be called very often in short period of time ## Examples -In the examples below the API is concentrating calls to one database RDS and one DynamoDB +In the examples below the API is making calls to one database RDS and one DynamoDB ### Example 1: Bad example @@ -42,7 +42,7 @@ app.get("/health", async (req, res) => { - There is a call to one of the database but not the other - The call is using too much data -- There is a 503 if the DB is down +- There is a 503 if the DB is down: the await RDS.getAllEntries() is then throwing an error hence the catch block is executed ### Example 3: Good example @@ -60,7 +60,7 @@ RDS.getHealth = async () => { await knex.raw("select 1+1 as result"); }; -DynamoDB.getDynamoHealth = (): Promise> => { +DynamoDB.getDynamoHealth = () => { return new Promise((resolve, reject) => { dynamodb.describeTable({ TableName: dynamodbTableName }, (error, data) => { if (error) {