jibbs1703
diff --git a/‎aws_resources/__init__.py renamed to ‎.github/workflows/CI.yaml b/‎aws_resources/__init__.py renamed to ‎.github/workflows/CI.yaml
diff --git a/‎.gitignore
Lines changed: 4 additions & 1 deletion b/‎.gitignore
Lines changed: 4 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 15 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 15 additions & 0 deletions
diff --git a/‎.sqlfluff
Lines changed: 40 additions & 0 deletions b/‎.sqlfluff
Lines changed: 40 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
@@ -6,6 +6,9 @@ __pycache__/
 # C extensions
 *.so
 
+# Ruff
+.ruff_cache/
+
 # Distribution / packaging
 .Python
 build/
@@ -123,7 +126,7 @@ celerybeat.pid
 
 # Environments
 config.ini
-aws_resources/.env
+.env
 .venv
 env/
 venv/
 
@@ -0,0 +1,15 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.9.7
+  hooks:
+    - id: ruff
+      args: [ --fix ]
+    - id: ruff-format
+
+- repo: https://github.com/sqlfluff/sqlfluff
+  rev: 2.3.3
+  hooks:
+    - id: sqlfluff-lint
+      args: ["--dialect", "mysql"]
+    - id: sqlfluff-fix
+      args: ["--dialect", "mysql"]
@@ -0,0 +1,40 @@
+[sqlfluff]
+dialect = mysql
+
+[sqlfluff:rules]
+max_line_length = 100 
+capitalisation.policy = consistent
+
+[sqlfluff:rules:L010]
+capitalisation_policy = upper
+
+[sqlfluff:rules:L011]
+capitalisation_policy = lower
+
+[sqlfluff:rules:L014]
+capitalisation_policy = lower
+
+[sqlfluff:rules:L016]
+forbid_multiline = True
+
+[sqlfluff:rules:L018]
+require_aliases = True
+
+[sqlfluff:rules:L019]
+comma_style = leading
+
+[sqlfluff:rules:L022]
+aliasing = explicit
+
+[sqlfluff:rules:L025]
+force_enable = True
+
+[sqlfluff:rules:L030]
+require_final_semicolon = True
+
+# Formatting settings
+[sqlfluff:format]
+indent_width = 4
+tab_space_size = 4
+reindent_aligned = True
+strip_whitespace_lines = True
@@ -12,7 +12,7 @@ This pipeline can be modified to source data from various external inputs includ
 application logs, databases, and mobile applications. The steps in the pipeline can be performed using either 
 the Python shell or Pyspark jobs. 
 
-In this project, raw, untransformed data resides in external databases and is initially extracted as .csv files 
+In this project, raw, untransformed data resides in on-premises NOSQL databases and is initially extracted as .csv files 
 into a bronze tier S3 bucket. The pipeline works on the raw data, processing it, and subsequently storing it in
 the appropriate data lake tier as determined by business requirements. The tiers are represented as folders within
 a single S3 bucket for this project. However, each tier should be given a dedicated bucket (as it is in production