Skip to content

Commit ac0a295

Browse files
feat: improve database connection handling and update certificate deletion logic (#335)
* feat: improve database connection handling and update certificate deletion logic
1 parent aed1e16 commit ac0a295

1 file changed

Lines changed: 108 additions & 94 deletions

File tree

util/jenkins/retired_user_cert_remover/retired_user_cert_remover.py

Lines changed: 108 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,121 +1,135 @@
11
"""
2-
Script to delete downloadable certificates of inactive users from S3, based on RDS MySQL database entries.
2+
Script to delete downloadable certificates of inactive users from S3 by calling
3+
the LMS retire_certs_s3 API endpoint.
34
4-
Usage:
5-
python retired_user_cert_remover.py --db-host=my-db-host --db-name=my-db --dry-run
5+
This script no longer connects directly to RDS. All certificate discovery, S3
6+
deletion, and database updates are handled by the LMS API endpoint:
7+
POST /api/certificates/v1/retire_certs_s3
8+
9+
The LMS endpoint requires an OAuth token obtained by exchanging client_id /
10+
client_secret (stored in AWS Secrets Manager) for a bearer token.
611
7-
Arguments:
8-
--db-host The RDS database host.
9-
--db-name The database name.
10-
--dry-run Run the script in dry-run mode (logs actions without deleting).
11-
--db-user The RDS database user (also settable via DB_USER env var).
12-
--db-password The RDS database password (also settable via DB_PASSWORD env var).
12+
Usage:
13+
python retired_user_cert_remover.py \
14+
--lms-host=https://lms.example.com \
15+
--client-id=<DOT client id> \
16+
--client-secret=<DOT client secret> \
17+
[--dry-run]
1318
1419
Environment Variables:
15-
DB_USER Database username (alternative to --db-user).
16-
DB_PASSWORD Database password (alternative to --db-password).
17-
18-
Functionality:
19-
- Connects to an RDS MySQL database and fetches certificates for inactive users.
20-
- Targets only certificates with a valid download URL and status 'downloadable'.
21-
- Deletes corresponding certificate files from S3 (verify and download locations).
22-
- Supports dry-run mode to simulate deletions for review.
23-
24-
Example:
25-
export DB_USER=admin
26-
export DB_PASSWORD=securepass
27-
python retired_user_cert_remover.py --db-host=mydb.amazonaws.com --db-name=edxapp --dry-run
20+
LMS_CLIENT_ID OAuth client id (alternative to --client-id).
21+
LMS_CLIENT_SECRET OAuth client secret (alternative to --client-secret).
22+
23+
Dry-run:
24+
Passes ?dry_run=true to the API. The LMS logs what would be deleted without
25+
making any changes to S3 or the database.
2826
"""
2927

30-
import boto3
31-
from botocore.exceptions import ClientError
32-
import pymysql
28+
import logging
29+
import sys
30+
3331
import backoff
3432
import click
35-
import sys
36-
import logging
33+
import requests
34+
35+
MAX_TOKEN_ATTEMPTS = 3
36+
MAX_API_ATTEMPTS = 3
3737

38-
MAX_TRIES = 5
39-
# Configure logging
40-
LOGGER = logging.getLogger(__name__)
4138
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
39+
LOGGER = logging.getLogger(__name__)
4240

4341

44-
class S3BotoWrapper:
45-
def __init__(self):
46-
self.client = boto3.client("s3")
42+
def get_oauth_token(lms_host, client_id, client_secret):
43+
"""
44+
Exchange client credentials for a bearer token via LMS DOT.
45+
46+
Returns the access token string, or exits on failure.
47+
"""
48+
token_url = f'{lms_host.rstrip("/")}/oauth2/access_token/'
49+
50+
@backoff.on_exception(backoff.expo, requests.RequestException, max_tries=MAX_TOKEN_ATTEMPTS)
51+
def _request():
52+
response = requests.post(
53+
token_url,
54+
data={
55+
'grant_type': 'client_credentials',
56+
'client_id': client_id,
57+
'client_secret': client_secret,
58+
},
59+
timeout=30,
60+
)
61+
response.raise_for_status()
62+
return response.json()['access_token']
4763

48-
@backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES)
49-
def delete_object(self, bucket, key):
50-
return self.client.delete_object(Bucket=bucket, Key=key)
64+
try:
65+
token = _request()
66+
LOGGER.info('Successfully obtained OAuth token from %s', token_url)
67+
return token
68+
except Exception as exc:
69+
LOGGER.error('Failed to obtain OAuth token: %s', exc)
70+
sys.exit(1)
5171

5272

53-
def fetch_certificates_to_delete(db_host, db_user, db_password, db_name):
73+
def call_retire_certs_api(lms_host, token, dry_run):
74+
"""
75+
Call POST /api/certificates/v1/retire_certs_s3 on the LMS.
76+
77+
Returns the parsed JSON response body.
78+
Retries up to MAX_API_ATTEMPTS times on transient network errors.
79+
Exits with code 1 if the call fails entirely after retries.
80+
Exits with code 2 if the call returns 207 (partial failure).
81+
"""
82+
url = f'{lms_host.rstrip("/")}/api/certificates/v1/retire_certs_s3'
83+
params = {'dry_run': 'true'} if dry_run else {}
84+
headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}
85+
86+
@backoff.on_exception(backoff.expo, requests.RequestException, max_tries=MAX_API_ATTEMPTS)
87+
def _request():
88+
response = requests.post(url, params=params, headers=headers, timeout=600)
89+
# Retry on 5xx server errors; 2xx/207/4xx are handled below.
90+
if response.status_code >= 500:
91+
response.raise_for_status()
92+
return response
93+
94+
LOGGER.info('Calling %s (dry_run=%s)', url, dry_run)
5495
try:
55-
connection = pymysql.connect(host=db_host, user=db_user, password=db_password, database=db_name)
56-
cursor = connection.cursor()
57-
logging.info("Running query on database...")
58-
cursor.execute("""
59-
SELECT
60-
au.id as "LMS_USER_ID",
61-
gc.course_id as "COURSE_RUN_ID",
62-
gc.id as "CERTIFICATE_ID",
63-
gc.download_url as "CERTIFICATE_URL",
64-
gc.download_uuid as "DOWNLOAD_UUID",
65-
gc.verify_uuid as "VERIFY_UUID"
66-
FROM
67-
auth_user as au
68-
JOIN
69-
certificates_generatedcertificate as gc
70-
ON
71-
gc.user_id = au.id
72-
WHERE
73-
au.is_active = 0
74-
AND gc.download_url LIKE '%%https://%%'
75-
AND gc.status = 'downloadable'
76-
ORDER BY
77-
LMS_USER_ID,
78-
COURSE_RUN_ID;
79-
""")
80-
result = cursor.fetchall()
81-
cursor.close()
82-
connection.close()
83-
return result
84-
except Exception as ex:
85-
logging.error(f"Database query failed with error: {ex}")
96+
response = _request()
97+
except requests.RequestException as exc:
98+
LOGGER.error('HTTP request to retire_certs_s3 failed after retries: %s', exc)
8699
sys.exit(1)
87100

101+
body = {}
102+
try:
103+
body = response.json()
104+
except ValueError:
105+
pass
106+
107+
if response.status_code == 200:
108+
LOGGER.info('retire_certs_s3 completed successfully: %s', body)
109+
return body
88110

89-
def delete_certificates_from_s3(certificates, dry_run):
90-
s3_client = S3BotoWrapper()
91-
for cert in certificates:
92-
verify_uuid = cert[5] # VERIFY_UUID
93-
download_uuid = cert[4] # DOWNLOAD_UUID
111+
if response.status_code == 207:
112+
LOGGER.warning(
113+
'retire_certs_s3 completed with partial failures: processed=%s failed=%s',
114+
body.get('processed'), body.get('failed'),
115+
)
116+
sys.exit(2)
94117

95-
verify_key = f"cert/{verify_uuid}"
96-
download_key = f"downloads/{download_uuid}/Certificate.pdf"
97-
try:
98-
if dry_run:
99-
logging.info(f"[Dry Run] Would delete {verify_key} from S3")
100-
logging.info(f"[Dry Run] Would delete {download_key} from S3")
101-
else:
102-
logging.info(f"Deleting {verify_key} from S3...")
103-
s3_client.delete_object("verify.edx.org", verify_key)
104-
logging.info(f"Deleting {download_key} from S3...")
105-
s3_client.delete_object("verify.edx.org", download_key)
106-
except ClientError as e:
107-
logging.error(f"Error deleting {verify_key} or {download_key}: {e}")
118+
LOGGER.error(
119+
'retire_certs_s3 returned unexpected status %s: %s',
120+
response.status_code, body,
121+
)
122+
sys.exit(1)
108123

109124

110125
@click.command()
111-
@click.option('--db-host', '-h', required=True, help='Database host')
112-
@click.option('--db-user', envvar='DB_USER', required=True, help='Database user')
113-
@click.option('--db-password', envvar='DB_PASSWORD', required=True, help='Database password')
114-
@click.option('--db-name', '-db', required=True, help='Database name')
115-
@click.option('--dry-run', is_flag=True, help='Run the script in dry-run mode without making any changes')
116-
def controller(db_host, db_user, db_password, db_name, dry_run):
117-
certificates = fetch_certificates_to_delete(db_host, db_user, db_password, db_name)
118-
delete_certificates_from_s3(certificates, dry_run)
126+
@click.option('--lms-host', required=True, help='Base URL of the LMS (e.g. https://lms.edx.org)')
127+
@click.option('--client-id', envvar='LMS_CLIENT_ID', required=True, help='OAuth DOT client id')
128+
@click.option('--client-secret', envvar='LMS_CLIENT_SECRET', required=True, help='OAuth DOT client secret')
129+
@click.option('--dry-run', is_flag=True, help='Run in dry-run mode without making any changes')
130+
def controller(lms_host, client_id, client_secret, dry_run):
131+
token = get_oauth_token(lms_host, client_id, client_secret)
132+
call_retire_certs_api(lms_host, token, dry_run)
119133

120134

121135
if __name__ == '__main__':

0 commit comments

Comments
 (0)