|
1 | 1 | """ |
2 | | -Script to delete downloadable certificates of inactive users from S3, based on RDS MySQL database entries. |
| 2 | +Script to delete downloadable certificates of inactive users from S3 by calling |
| 3 | +the LMS retire_certs_s3 API endpoint. |
3 | 4 |
|
4 | | -Usage: |
5 | | - python retired_user_cert_remover.py --db-host=my-db-host --db-name=my-db --dry-run |
| 5 | +This script no longer connects directly to RDS. All certificate discovery, S3 |
| 6 | +deletion, and database updates are handled by the LMS API endpoint: |
| 7 | + POST /api/certificates/v1/retire_certs_s3 |
| 8 | +
|
| 9 | +The LMS endpoint requires an OAuth token obtained by exchanging client_id / |
| 10 | +client_secret (stored in AWS Secrets Manager) for a bearer token. |
6 | 11 |
|
7 | | -Arguments: |
8 | | - --db-host The RDS database host. |
9 | | - --db-name The database name. |
10 | | - --dry-run Run the script in dry-run mode (logs actions without deleting). |
11 | | - --db-user The RDS database user (also settable via DB_USER env var). |
12 | | - --db-password The RDS database password (also settable via DB_PASSWORD env var). |
| 12 | +Usage: |
| 13 | + python retired_user_cert_remover.py \ |
| 14 | + --lms-host=https://lms.example.com \ |
| 15 | + --client-id=<DOT client id> \ |
| 16 | + --client-secret=<DOT client secret> \ |
| 17 | + [--dry-run] |
13 | 18 |
|
14 | 19 | Environment Variables: |
15 | | - DB_USER Database username (alternative to --db-user). |
16 | | - DB_PASSWORD Database password (alternative to --db-password). |
17 | | -
|
18 | | -Functionality: |
19 | | - - Connects to an RDS MySQL database and fetches certificates for inactive users. |
20 | | - - Targets only certificates with a valid download URL and status 'downloadable'. |
21 | | - - Deletes corresponding certificate files from S3 (verify and download locations). |
22 | | - - Supports dry-run mode to simulate deletions for review. |
23 | | -
|
24 | | -Example: |
25 | | - export DB_USER=admin |
26 | | - export DB_PASSWORD=securepass |
27 | | - python retired_user_cert_remover.py --db-host=mydb.amazonaws.com --db-name=edxapp --dry-run |
| 20 | + LMS_CLIENT_ID OAuth client id (alternative to --client-id). |
| 21 | + LMS_CLIENT_SECRET OAuth client secret (alternative to --client-secret). |
| 22 | +
|
| 23 | +Dry-run: |
| 24 | + Passes ?dry_run=true to the API. The LMS logs what would be deleted without |
| 25 | + making any changes to S3 or the database. |
28 | 26 | """ |
29 | 27 |
|
30 | | -import boto3 |
31 | | -from botocore.exceptions import ClientError |
32 | | -import pymysql |
| 28 | +import logging |
| 29 | +import sys |
| 30 | + |
33 | 31 | import backoff |
34 | 32 | import click |
35 | | -import sys |
36 | | -import logging |
| 33 | +import requests |
| 34 | + |
| 35 | +MAX_TOKEN_ATTEMPTS = 3 |
| 36 | +MAX_API_ATTEMPTS = 3 |
37 | 37 |
|
38 | | -MAX_TRIES = 5 |
39 | | -# Configure logging |
40 | | -LOGGER = logging.getLogger(__name__) |
41 | 38 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| 39 | +LOGGER = logging.getLogger(__name__) |
42 | 40 |
|
43 | 41 |
|
44 | | -class S3BotoWrapper: |
45 | | - def __init__(self): |
46 | | - self.client = boto3.client("s3") |
| 42 | +def get_oauth_token(lms_host, client_id, client_secret): |
| 43 | + """ |
| 44 | + Exchange client credentials for a bearer token via LMS DOT. |
| 45 | +
|
| 46 | + Returns the access token string, or exits on failure. |
| 47 | + """ |
| 48 | + token_url = f'{lms_host.rstrip("/")}/oauth2/access_token/' |
| 49 | + |
| 50 | + @backoff.on_exception(backoff.expo, requests.RequestException, max_tries=MAX_TOKEN_ATTEMPTS) |
| 51 | + def _request(): |
| 52 | + response = requests.post( |
| 53 | + token_url, |
| 54 | + data={ |
| 55 | + 'grant_type': 'client_credentials', |
| 56 | + 'client_id': client_id, |
| 57 | + 'client_secret': client_secret, |
| 58 | + }, |
| 59 | + timeout=30, |
| 60 | + ) |
| 61 | + response.raise_for_status() |
| 62 | + return response.json()['access_token'] |
47 | 63 |
|
48 | | - @backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES) |
49 | | - def delete_object(self, bucket, key): |
50 | | - return self.client.delete_object(Bucket=bucket, Key=key) |
| 64 | + try: |
| 65 | + token = _request() |
| 66 | + LOGGER.info('Successfully obtained OAuth token from %s', token_url) |
| 67 | + return token |
| 68 | + except Exception as exc: |
| 69 | + LOGGER.error('Failed to obtain OAuth token: %s', exc) |
| 70 | + sys.exit(1) |
51 | 71 |
|
52 | 72 |
|
53 | | -def fetch_certificates_to_delete(db_host, db_user, db_password, db_name): |
| 73 | +def call_retire_certs_api(lms_host, token, dry_run): |
| 74 | + """ |
| 75 | + Call POST /api/certificates/v1/retire_certs_s3 on the LMS. |
| 76 | +
|
| 77 | + Returns the parsed JSON response body. |
| 78 | + Retries up to MAX_API_ATTEMPTS times on transient network errors. |
| 79 | + Exits with code 1 if the call fails entirely after retries. |
| 80 | + Exits with code 2 if the call returns 207 (partial failure). |
| 81 | + """ |
| 82 | + url = f'{lms_host.rstrip("/")}/api/certificates/v1/retire_certs_s3' |
| 83 | + params = {'dry_run': 'true'} if dry_run else {} |
| 84 | + headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'} |
| 85 | + |
| 86 | + @backoff.on_exception(backoff.expo, requests.RequestException, max_tries=MAX_API_ATTEMPTS) |
| 87 | + def _request(): |
| 88 | + response = requests.post(url, params=params, headers=headers, timeout=600) |
| 89 | + # Retry on 5xx server errors; 2xx/207/4xx are handled below. |
| 90 | + if response.status_code >= 500: |
| 91 | + response.raise_for_status() |
| 92 | + return response |
| 93 | + |
| 94 | + LOGGER.info('Calling %s (dry_run=%s)', url, dry_run) |
54 | 95 | try: |
55 | | - connection = pymysql.connect(host=db_host, user=db_user, password=db_password, database=db_name) |
56 | | - cursor = connection.cursor() |
57 | | - logging.info("Running query on database...") |
58 | | - cursor.execute(""" |
59 | | - SELECT |
60 | | - au.id as "LMS_USER_ID", |
61 | | - gc.course_id as "COURSE_RUN_ID", |
62 | | - gc.id as "CERTIFICATE_ID", |
63 | | - gc.download_url as "CERTIFICATE_URL", |
64 | | - gc.download_uuid as "DOWNLOAD_UUID", |
65 | | - gc.verify_uuid as "VERIFY_UUID" |
66 | | - FROM |
67 | | - auth_user as au |
68 | | - JOIN |
69 | | - certificates_generatedcertificate as gc |
70 | | - ON |
71 | | - gc.user_id = au.id |
72 | | - WHERE |
73 | | - au.is_active = 0 |
74 | | - AND gc.download_url LIKE '%%https://%%' |
75 | | - AND gc.status = 'downloadable' |
76 | | - ORDER BY |
77 | | - LMS_USER_ID, |
78 | | - COURSE_RUN_ID; |
79 | | - """) |
80 | | - result = cursor.fetchall() |
81 | | - cursor.close() |
82 | | - connection.close() |
83 | | - return result |
84 | | - except Exception as ex: |
85 | | - logging.error(f"Database query failed with error: {ex}") |
| 96 | + response = _request() |
| 97 | + except requests.RequestException as exc: |
| 98 | + LOGGER.error('HTTP request to retire_certs_s3 failed after retries: %s', exc) |
86 | 99 | sys.exit(1) |
87 | 100 |
|
| 101 | + body = {} |
| 102 | + try: |
| 103 | + body = response.json() |
| 104 | + except ValueError: |
| 105 | + pass |
| 106 | + |
| 107 | + if response.status_code == 200: |
| 108 | + LOGGER.info('retire_certs_s3 completed successfully: %s', body) |
| 109 | + return body |
88 | 110 |
|
89 | | -def delete_certificates_from_s3(certificates, dry_run): |
90 | | - s3_client = S3BotoWrapper() |
91 | | - for cert in certificates: |
92 | | - verify_uuid = cert[5] # VERIFY_UUID |
93 | | - download_uuid = cert[4] # DOWNLOAD_UUID |
| 111 | + if response.status_code == 207: |
| 112 | + LOGGER.warning( |
| 113 | + 'retire_certs_s3 completed with partial failures: processed=%s failed=%s', |
| 114 | + body.get('processed'), body.get('failed'), |
| 115 | + ) |
| 116 | + sys.exit(2) |
94 | 117 |
|
95 | | - verify_key = f"cert/{verify_uuid}" |
96 | | - download_key = f"downloads/{download_uuid}/Certificate.pdf" |
97 | | - try: |
98 | | - if dry_run: |
99 | | - logging.info(f"[Dry Run] Would delete {verify_key} from S3") |
100 | | - logging.info(f"[Dry Run] Would delete {download_key} from S3") |
101 | | - else: |
102 | | - logging.info(f"Deleting {verify_key} from S3...") |
103 | | - s3_client.delete_object("verify.edx.org", verify_key) |
104 | | - logging.info(f"Deleting {download_key} from S3...") |
105 | | - s3_client.delete_object("verify.edx.org", download_key) |
106 | | - except ClientError as e: |
107 | | - logging.error(f"Error deleting {verify_key} or {download_key}: {e}") |
| 118 | + LOGGER.error( |
| 119 | + 'retire_certs_s3 returned unexpected status %s: %s', |
| 120 | + response.status_code, body, |
| 121 | + ) |
| 122 | + sys.exit(1) |
108 | 123 |
|
109 | 124 |
|
110 | 125 | @click.command() |
111 | | -@click.option('--db-host', '-h', required=True, help='Database host') |
112 | | -@click.option('--db-user', envvar='DB_USER', required=True, help='Database user') |
113 | | -@click.option('--db-password', envvar='DB_PASSWORD', required=True, help='Database password') |
114 | | -@click.option('--db-name', '-db', required=True, help='Database name') |
115 | | -@click.option('--dry-run', is_flag=True, help='Run the script in dry-run mode without making any changes') |
116 | | -def controller(db_host, db_user, db_password, db_name, dry_run): |
117 | | - certificates = fetch_certificates_to_delete(db_host, db_user, db_password, db_name) |
118 | | - delete_certificates_from_s3(certificates, dry_run) |
| 126 | +@click.option('--lms-host', required=True, help='Base URL of the LMS (e.g. https://lms.edx.org)') |
| 127 | +@click.option('--client-id', envvar='LMS_CLIENT_ID', required=True, help='OAuth DOT client id') |
| 128 | +@click.option('--client-secret', envvar='LMS_CLIENT_SECRET', required=True, help='OAuth DOT client secret') |
| 129 | +@click.option('--dry-run', is_flag=True, help='Run in dry-run mode without making any changes') |
| 130 | +def controller(lms_host, client_id, client_secret, dry_run): |
| 131 | + token = get_oauth_token(lms_host, client_id, client_secret) |
| 132 | + call_retire_certs_api(lms_host, token, dry_run) |
119 | 133 |
|
120 | 134 |
|
121 | 135 | if __name__ == '__main__': |
|
0 commit comments