Skip to content

Commit 27e20c0

Browse files
committed
Add TiDB database monitoring support
1 parent 4840571 commit 27e20c0

14 files changed

+1354
-97
lines changed

mysql/changelog.d/20826.changed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implement TiDB database monitoring

mysql/datadog_checks/mysql/activity.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ def run_job(self):
183183
'Waiting for events_waits_current availability to be determined by the check, skipping run.'
184184
)
185185
if self._check.events_wait_current_enabled is False:
186+
# Skip warning for TiDB as it doesn't have performance schema consumers
187+
if self._check._get_is_tidb(self._db):
188+
self._log.debug("TiDB detected, skipping query activity collection")
189+
return
190+
186191
azure_deployment_type = self._config.cloud_metadata.get("azure", {}).get("deployment_type")
187192
if azure_deployment_type != "flexible_server":
188193
self._check.record_warning(

mysql/datadog_checks/mysql/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def __init__(self, instance, init_config):
5252
# Backward compatibility: check new names first, then fall back to old names
5353
self.schemas_config: dict = instance.get('collect_schemas', instance.get('schemas_collection', {})) or {}
5454
self.index_config: dict = instance.get('index_metrics', {}) or {}
55+
self.slow_query_config: dict = instance.get('slow_query', {}) or {}
5556
self.collect_blocking_queries = is_affirmative(instance.get('collect_blocking_queries', False))
5657

5758
self.cloud_metadata = {}

mysql/datadog_checks/mysql/mysql.py

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,12 @@
8181
SQL_REPLICATION_ROLE_AWS_AURORA,
8282
SQL_SERVER_ID_AWS_AURORA,
8383
SQL_SERVER_UUID,
84+
SQL_TIDB_VERSION,
8485
show_replica_status_query,
8586
)
8687
from .statement_samples import MySQLStatementSamples
8788
from .statements import MySQLStatementMetrics
89+
from .tidb_slow_query import TiDBSlowQueryMonitor
8890
from .util import DatabaseConfigurationError, connect_with_session_variables # noqa: F401
8991
from .version_utils import get_version
9092

@@ -121,6 +123,7 @@ def __init__(self, name, init_config, instances):
121123
self._agent_hostname = None
122124
self._database_hostname = None
123125
self._is_aurora = None
126+
self._is_tidb = None
124127
self._performance_schema_enabled = None
125128
self._events_wait_current_enabled = None
126129
self._group_replication_active = None
@@ -146,6 +149,7 @@ def __init__(self, name, init_config, instances):
146149
self._mysql_metadata = MySQLMetadata(self, self._config, self._get_connection_args())
147150
self._query_activity = MySQLActivity(self, self._config, self._get_connection_args())
148151
self._index_metrics = MySqlIndexMetrics(self._config)
152+
self._tidb_slow_query_monitor = None # Initialized later if TiDB is detected
149153
# _database_instance_emitted: limit the collection and transmission of the database instance metadata
150154
self._database_instance_emitted = TTLCache(
151155
maxsize=1,
@@ -288,6 +292,7 @@ def set_resource_tags(self):
288292
def set_version(self, db):
289293
self.version = get_version(db)
290294
self.is_mariadb = self.version.flavor == "MariaDB"
295+
self.is_tidb = self.version.flavor == "TiDB"
291296
self.tag_manager.set_tag("dbms_flavor", self.version.flavor.lower(), replace=True)
292297

293298
def set_server_uuid(self, db):
@@ -314,6 +319,12 @@ def _check_database_configuration(self, db):
314319
self._is_group_replication_active(db)
315320

316321
def _check_performance_schema_enabled(self, db):
322+
# TiDB doesn't have performance_schema but uses information_schema.cluster_statements_summary instead
323+
if self._get_is_tidb(db):
324+
self._performance_schema_enabled = False
325+
self.log.debug("TiDB detected, performance_schema check skipped")
326+
return self._performance_schema_enabled
327+
317328
with closing(db.cursor(CommenterCursor)) as cursor:
318329
cursor.execute("SHOW VARIABLES LIKE 'performance_schema'")
319330
results = dict(cursor.fetchall())
@@ -420,6 +431,15 @@ def check(self, _):
420431
self._query_activity.run_job_loop(dbm_tags)
421432
self._mysql_metadata.run_job_loop(dbm_tags)
422433

434+
# Initialize and run TiDB slow query monitor if this is TiDB
435+
if self._get_is_tidb(db) and self._tidb_slow_query_monitor is None:
436+
self._tidb_slow_query_monitor = TiDBSlowQueryMonitor(
437+
self, self._config, self._get_connection_args()
438+
)
439+
440+
if self._tidb_slow_query_monitor:
441+
self._tidb_slow_query_monitor.run_job_loop(dbm_tags)
442+
423443
# keeping track of these:
424444
self._put_qcache_stats()
425445

@@ -438,6 +458,8 @@ def cancel(self):
438458
self._statement_metrics.cancel()
439459
self._query_activity.cancel()
440460
self._mysql_metadata.cancel()
461+
if self._tidb_slow_query_monitor:
462+
self._tidb_slow_query_monitor.cancel()
441463

442464
def _new_query_executor(self, queries):
443465
return QueryExecutor(
@@ -462,7 +484,8 @@ def _get_runtime_queries(self, db):
462484

463485
if self.performance_schema_enabled:
464486
queries.extend([QUERY_USER_CONNECTIONS])
465-
if self._index_metrics.include_index_metrics:
487+
# TiDB doesn't have mysql.innodb_index_stats table
488+
if self._index_metrics.include_index_metrics and not self._get_is_tidb(db):
466489
queries.extend(self._index_metrics.queries)
467490
self._runtime_queries_cached = self._new_query_executor(queries)
468491
self._runtime_queries_cached.compile_queries()
@@ -582,7 +605,11 @@ def _collect_metrics(self, db, tags):
582605
# Innodb metrics are not available for Aurora reader instances
583606
if self._is_aurora and self._replication_role == "reader":
584607
self.log.debug("Skipping innodb metrics collection for reader instance")
608+
# TiDB does not support InnoDB storage engine
609+
elif self._get_is_tidb(db):
610+
self.log.info("Skipping innodb metrics collection for TiDB instance")
585611
else:
612+
self.log.debug("Collecting InnoDB metrics (not Aurora reader, not TiDB)")
586613
with tracked_query(self, operation="innodb_metrics"):
587614
results.update(self.innodb_stats.get_stats_from_innodb_status(db))
588615
self.innodb_stats.process_innodb_stats(results, self._config.options, metrics)
@@ -593,23 +620,29 @@ def _collect_metrics(self, db, tags):
593620
results['Binlog_space_usage_bytes'] = self._get_binary_log_stats(db)
594621

595622
# Compute key cache utilization metric
596-
key_blocks_unused = collect_scalar('Key_blocks_unused', results)
597-
key_cache_block_size = collect_scalar('key_cache_block_size', results)
598-
key_buffer_size = collect_scalar('key_buffer_size', results)
599-
results['Key_buffer_size'] = key_buffer_size
623+
# TiDB doesn't have MyISAM key cache metrics
624+
if not self._get_is_tidb(db):
625+
key_blocks_unused = collect_scalar('Key_blocks_unused', results)
626+
key_cache_block_size = collect_scalar('key_cache_block_size', results)
627+
key_buffer_size = collect_scalar('key_buffer_size', results)
628+
results['Key_buffer_size'] = key_buffer_size
600629

601-
try:
602-
# can be null if the unit is missing in the user config (4 instead of 4G for eg.)
603-
if key_buffer_size != 0:
604-
key_cache_utilization = 1 - ((key_blocks_unused * key_cache_block_size) / key_buffer_size)
605-
results['Key_cache_utilization'] = key_cache_utilization
606-
607-
results['Key_buffer_bytes_used'] = collect_scalar('Key_blocks_used', results) * key_cache_block_size
608-
results['Key_buffer_bytes_unflushed'] = (
609-
collect_scalar('Key_blocks_not_flushed', results) * key_cache_block_size
610-
)
611-
except TypeError as e:
612-
self.log.error("Not all Key metrics are available, unable to compute: %s", e)
630+
try:
631+
# can be null if the unit is missing in the user config (4 instead of 4G for eg.)
632+
if key_buffer_size != 0 and key_cache_block_size is not None and key_blocks_unused is not None:
633+
key_cache_utilization = 1 - ((key_blocks_unused * key_cache_block_size) / key_buffer_size)
634+
results['Key_cache_utilization'] = key_cache_utilization
635+
636+
if key_cache_block_size is not None:
637+
key_blocks_used = collect_scalar('Key_blocks_used', results)
638+
key_blocks_not_flushed = collect_scalar('Key_blocks_not_flushed', results)
639+
640+
if key_blocks_used is not None:
641+
results['Key_buffer_bytes_used'] = key_blocks_used * key_cache_block_size
642+
if key_blocks_not_flushed is not None:
643+
results['Key_buffer_bytes_unflushed'] = key_blocks_not_flushed * key_cache_block_size
644+
except (TypeError, ZeroDivisionError) as e:
645+
self.log.error("Not all Key metrics are available, unable to compute: %s", e)
613646

614647
metrics.update(VARIABLES_VARS)
615648
metrics.update(INNODB_VARS)
@@ -681,7 +714,10 @@ def _collect_metrics(self, db, tags):
681714
metrics.update(TABLE_VARS)
682715

683716
if self._config.replication_enabled:
684-
if self.performance_schema_enabled and self._group_replication_active:
717+
# TiDB does not support MySQL replication
718+
if self._get_is_tidb(db):
719+
self.log.debug("Skipping replication metrics collection for TiDB instance")
720+
elif self.performance_schema_enabled and self._group_replication_active:
685721
self.log.debug('Collecting group replication metrics.')
686722
with tracked_query(self, operation="group_replication_metrics"):
687723
self._collect_group_replica_metrics(db, results)
@@ -1126,6 +1162,35 @@ def _get_is_aurora(self, db):
11261162

11271163
return self._is_aurora
11281164

1165+
def _get_is_tidb(self, db):
1166+
"""
1167+
Tests if the instance is a TiDB database and caches the result.
1168+
"""
1169+
if self._is_tidb is not None:
1170+
return self._is_tidb
1171+
1172+
try:
1173+
with closing(db.cursor(CommenterCursor)) as cursor:
1174+
cursor.execute(SQL_TIDB_VERSION)
1175+
result = cursor.fetchone()
1176+
self.log.debug("TiDB detection - VERSION() result: %s", result)
1177+
if result and 'TiDB' in result[0]:
1178+
self._is_tidb = True
1179+
self.log.info("Detected TiDB instance")
1180+
else:
1181+
self._is_tidb = False
1182+
1183+
except Exception:
1184+
self.warning(
1185+
"Unable to determine if server is TiDB. If this is a TiDB database, some "
1186+
"information may be unavailable: %s",
1187+
traceback.format_exc(),
1188+
)
1189+
self._is_tidb = False
1190+
return False
1191+
1192+
return self._is_tidb
1193+
11291194
@classmethod
11301195
def _get_stats_from_status(cls, db):
11311196
with closing(db.cursor(CommenterCursor)) as cursor:
@@ -1164,6 +1229,12 @@ def _check_innodb_engine_enabled(self, db):
11641229
# table. Later is chosen because that involves no string parsing.
11651230
if self._is_innodb_engine_enabled_cached is not None:
11661231
return self._is_innodb_engine_enabled_cached
1232+
1233+
# TiDB doesn't have InnoDB engine
1234+
if self._get_is_tidb(db):
1235+
self._is_innodb_engine_enabled_cached = False
1236+
return self._is_innodb_engine_enabled_cached
1237+
11671238
try:
11681239
with closing(db.cursor(CommenterCursor)) as cursor:
11691240
cursor.execute(SQL_INNODB_ENGINES)
@@ -1439,13 +1510,16 @@ def _report_warnings(self):
14391510

14401511
def _send_database_instance_metadata(self):
14411512
if self.database_identifier not in self._database_instance_emitted:
1513+
# Keep dbms as "mysql" for all MySQL-compatible databases
1514+
# The specific flavor is identified through the dbms_flavor tag
1515+
dbms = "mysql"
14421516
event = {
14431517
"host": self.reported_hostname,
14441518
"port": self._config.port,
14451519
"database_instance": self.database_identifier,
14461520
"database_hostname": self.database_hostname,
14471521
"agent_version": datadog_agent.get_version(),
1448-
"dbms": "mysql",
1522+
"dbms": dbms,
14491523
"kind": "database_instance",
14501524
"collection_interval": self._config.database_instance_collection_interval,
14511525
'dbms_version': self.version.version + '+' + self.version.build,
@@ -1471,6 +1545,9 @@ def set_cluster_tags(self, db):
14711545
if self._group_replication_active:
14721546
self.log.debug("Group replication cluster tags are not currently supported")
14731547
return
1548+
if self._get_is_tidb(db):
1549+
self.log.debug("TiDB cluster tags are not currently supported")
1550+
return
14741551

14751552
replica_status = self._get_replica_replication_status(db)
14761553
if replica_status:

mysql/datadog_checks/mysql/queries.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@
7171
FROM information_schema.replica_host_status
7272
WHERE server_id = @@aurora_server_id"""
7373

74+
SQL_TIDB_VERSION = """\
75+
SELECT VERSION()"""
76+
7477
SQL_GROUP_REPLICATION_MEMBER = """\
7578
SELECT channel_name, member_state
7679
FROM performance_schema.replication_group_members

mysql/datadog_checks/mysql/statement_samples.py

Lines changed: 79 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -335,32 +335,74 @@ def _cursor_run(self, cursor, query, params=None, obfuscated_params=None, obfusc
335335
)
336336
raise
337337

338+
def _get_tidb_statement_samples(self):
339+
"""
340+
Get statement samples from TiDB's cluster_statements_summary table.
341+
342+
TiDB doesn't have events_statements_current, but we can use cluster_statements_summary
343+
with QUERY_SAMPLE_TEXT which contains a sample query for each digest.
344+
"""
345+
with closing(self._get_db_connection().cursor(CommenterDictCursor)) as cursor:
346+
# TiDB-specific query to get statement samples
347+
query = """
348+
SELECT
349+
SCHEMA_NAME as current_schema,
350+
QUERY_SAMPLE_TEXT as sql_text,
351+
DIGEST as digest,
352+
DIGEST_TEXT as digest_text,
353+
-- TiDB doesn't have these fields, so we'll use defaults
354+
0 as end_event_id,
355+
0 as timer_start,
356+
0 as uptime,
357+
unix_timestamp() as now,
358+
0 as timer_end,
359+
AVG_LATENCY as timer_wait_ns,
360+
0 as lock_time_ns,
361+
SAMPLE_USER as processlist_user,
362+
'' as processlist_host,
363+
EXEC_COUNT as rows_sent,
364+
AVG_AFFECTED_ROWS as rows_affected
365+
FROM information_schema.cluster_statements_summary
366+
WHERE QUERY_SAMPLE_TEXT IS NOT NULL
367+
AND QUERY_SAMPLE_TEXT != ''
368+
AND LAST_SEEN > DATE_SUB(NOW(), INTERVAL 1 MINUTE)
369+
ORDER BY LAST_SEEN DESC
370+
LIMIT 100
371+
"""
372+
self._cursor_run(cursor, query)
373+
return cursor.fetchall()
374+
338375
@tracked_method(agent_check_getter=attrgetter('_check'))
339376
def _get_new_events_statements_current(self):
340377
start = time.time()
341-
with closing(self._get_db_connection().cursor(CommenterDictCursor)) as cursor:
342-
self._cursor_run(
343-
cursor,
344-
"set @uptime = {}".format(UPTIME_SUBQUERY.format(global_status_table=self._global_status_table)),
345-
)
346-
self._cursor_run(cursor, EVENTS_STATEMENTS_CURRENT_QUERY)
347-
rows = cursor.fetchall()
348-
tags = (
349-
self._tags
350-
+ ["events_statements_table:{}".format(EVENTS_STATEMENTS_TABLE)]
351-
+ self._check._get_debug_tags()
352-
)
353-
self._check.histogram(
354-
"dd.mysql.get_new_events_statements.time",
355-
(time.time() - start) * 1000,
356-
tags=tags,
357-
hostname=self._check.reported_hostname,
358-
)
359-
self._check.histogram(
360-
"dd.mysql.get_new_events_statements.rows", len(rows), tags=tags, hostname=self._check.reported_hostname
361-
)
362-
self._log.debug("Read %s rows from %s", len(rows), EVENTS_STATEMENTS_TABLE)
363-
return rows
378+
379+
# Check if this is TiDB and use appropriate query
380+
if self._check._get_is_tidb(self._db):
381+
rows = self._get_tidb_statement_samples()
382+
else:
383+
with closing(self._get_db_connection().cursor(CommenterDictCursor)) as cursor:
384+
self._cursor_run(
385+
cursor,
386+
"set @uptime = {}".format(UPTIME_SUBQUERY.format(global_status_table=self._global_status_table)),
387+
)
388+
self._cursor_run(cursor, EVENTS_STATEMENTS_CURRENT_QUERY)
389+
rows = cursor.fetchall()
390+
391+
table_name = (
392+
"tidb_cluster_statements_summary" if self._check._get_is_tidb(self._db) else EVENTS_STATEMENTS_TABLE
393+
)
394+
tags = self._tags + ["events_statements_table:{}".format(table_name)] + self._check._get_debug_tags()
395+
self._check.histogram(
396+
"dd.mysql.get_new_events_statements.time",
397+
(time.time() - start) * 1000,
398+
tags=tags,
399+
hostname=self._check.reported_hostname,
400+
)
401+
self._check.histogram(
402+
"dd.mysql.get_new_events_statements.rows", len(rows), tags=tags, hostname=self._check.reported_hostname
403+
)
404+
self._log.debug("Read %s rows from %s", len(rows), table_name)
405+
return rows
364406

365407
def _filter_valid_statement_rows(self, rows):
366408
num_sent = 0
@@ -535,6 +577,16 @@ def _get_sample_collection_strategy(self):
535577
self._log.debug("Using cached events_statements_strategy: %s", cached_strategy)
536578
return cached_strategy
537579

580+
# Check if this is TiDB - it doesn't have performance_schema consumers
581+
if self._check._get_is_tidb(self._db):
582+
self._log.debug("TiDB detected, using TiDB statement samples strategy")
583+
collection_interval = self._configured_collection_interval
584+
if collection_interval < 0:
585+
collection_interval = 10 # Default 10 seconds for TiDB
586+
strategy = ("tidb_cluster_statements_summary", collection_interval)
587+
self._collection_strategy_cache["events_statements_strategy"] = strategy
588+
return strategy
589+
538590
enabled_consumers = self._get_enabled_performance_schema_consumers()
539591
if len(enabled_consumers) < 3:
540592
self._enable_events_statements_consumers()
@@ -829,6 +881,10 @@ def _run_fully_qualified_explain_procedure(self, schema, cursor, statement, obfu
829881
raise
830882

831883
def _has_sampled_since_completion(self, row, event_timestamp):
884+
# TiDB doesn't have end_event_id, so always return False for TiDB
885+
if self._check._get_is_tidb(self._db):
886+
return False
887+
832888
# If the query has finished end_event_id will be set
833889
if row['end_event_id']:
834890
query_end_time = self._calculate_timer_end(row)

0 commit comments

Comments
 (0)