Merge pull request #3034 from actiontech/optimize_00055_rule

littleniannian · web-flow · commit a98763c8e5d0 · 2025-05-09T10:16:27.000+08:00
Optimize 00055 rule
diff --git a/sqle/driver/mysql/plocale/active.en.toml b/sqle/driver/mysql/plocale/active.en.toml
@@ -837,7 +837,7 @@ Rule00054Desc = "Use unsigned BIGINT for primary key fields."
 Rule00054Message = "Use unsigned BIGINT for primary key fields."
 Rule00055Annotation = "MySQL maintains redundant indexes separately, increasing maintenance costs and impacting update performance."
 Rule00055Desc = "Avoid creating redundant indexes."
-Rule00055Message = "An index %v already exists. Index %v is a redundant index."
+Rule00055Message = "%v indexes already exist in the %v fields, and the indexes %v are redundant indexes."
 Rule00056Annotation = "Using non-standard character sets in the database may lead to encoding or decoding issues, resulting in data write failures or garbled query results, affecting database availability."
 Rule00056Desc = "Use specified character sets for tables."
 Rule00056Message = "Use specified character sets for tables: %v."
diff --git a/sqle/driver/mysql/plocale/active.zh.toml b/sqle/driver/mysql/plocale/active.zh.toml
@@ -837,7 +837,7 @@ Rule00054Desc = "建议主键字段使用BIGINT时采用无符号的BIGINT"
 Rule00054Message = "建议主键字段使用BIGINT时采用无符号的BIGINT"
 Rule00055Annotation = "MySQL需要单独维护重复的索引，冗余索引增加维护成本，影响更新性能"
 Rule00055Desc = "不建议创建冗余索引"
-Rule00055Message = "已存在索引 %v , 索引 %v 为冗余索引"
+Rule00055Message = "在 %v 字段已存在 %v 索引，索引 %v 为冗余索引"
 Rule00056Annotation = "数据库内使用非标准的字符集，可能导致字符无法编码或者编码不全引起的乱码，最终出现应用写入数据失败或者查询结果显示乱码，影响数据库服务可用性。"
 Rule00056Desc = "表建议使用指定的字符集"
 Rule00056Message = "表建议使用指定的字符集: %v"
diff --git a/sqle/driver/mysql/plocale/message_zh.go b/sqle/driver/mysql/plocale/message_zh.go
@@ -944,7 +944,7 @@ var (
 	Rule00054Message    = &i18n.Message{ID: "Rule00054Message", Other: "建议主键字段使用BIGINT时采用无符号的BIGINT"}
 	Rule00055Desc       = &i18n.Message{ID: "Rule00055Desc", Other: "不建议创建冗余索引"}
 	Rule00055Annotation = &i18n.Message{ID: "Rule00055Annotation", Other: "MySQL需要单独维护重复的索引，冗余索引增加维护成本，影响更新性能"}
-	Rule00055Message    = &i18n.Message{ID: "Rule00055Message", Other: "已存在索引 %v , 索引 %v 为冗余索引"}
+	Rule00055Message    = &i18n.Message{ID: "Rule00055Message", Other: "在 %v 字段已存在 %v 索引，索引 %v 为冗余索引"}
 	Rule00056Desc       = &i18n.Message{ID: "Rule00056Desc", Other: "表建议使用指定的字符集"}
 	Rule00056Annotation = &i18n.Message{ID: "Rule00056Annotation", Other: "数据库内使用非标准的字符集，可能导致字符无法编码或者编码不全引起的乱码，最终出现应用写入数据失败或者查询结果显示乱码，影响数据库服务可用性。"}
 	Rule00056Message    = &i18n.Message{ID: "Rule00056Message", Other: "表建议使用指定的字符集: %v"}
diff --git a/sqle/driver/mysql/rule/ai/rule_00055.go b/sqle/driver/mysql/rule/ai/rule_00055.go
@@ -1,6 +1,9 @@
 package ai
 
 import (
+	"sort"
+	"strings"
+
 	rulepkg "github.com/actiontech/sqle/sqle/driver/mysql/rule"
 	util "github.com/actiontech/sqle/sqle/driver/mysql/rule/ai/util"
 	driverV2 "github.com/actiontech/sqle/sqle/driver/v2"
@@ -54,22 +57,24 @@ func RuleSQLE00055(input *rulepkg.RuleHandlerInput) error {
 	switch stmt := input.Node.(type) {
 	case *ast.CreateTableStmt:
 		// "create table..."
-		indexes := [][]string{}
-
-		// get index column in column definition
+		colIndexes := map[string][]string{}
+		// 获取列定义中的索引列
 		for _, col := range stmt.Cols {
-			if util.IsColumnHasOption(col, ast.ColumnOptionUniqKey) || util.IsColumnPrimaryKey(col) {
-				indexes = append(indexes, []string{util.GetColumnName(col)})
+			if util.IsColumnHasOption(col, ast.ColumnOptionUniqKey) {
+				colIndexes["UNIQUE["+col.Name.Name.O+"]"] = []string{util.GetColumnName(col)}
+			}
+			if util.IsColumnPrimaryKey(col) {
+				colIndexes["Primary Key"] = []string{util.GetColumnName(col)}
 			}
 		}
 
-		// get index column in table constraint
-		indexes = append(indexes, extractIndexesFromConstraints(util.GetTableConstraints(stmt.Constraints, util.GetIndexConstraintTypes()...))...)
-
-		// check the index is not duplicated
-		for redundant, source := range calculateIndexRedundant(indexes) {
-			rulepkg.AddResult(input.Res, input.Rule, SQLE00055, indexes[source], indexes[redundant])
-		}
+		// 获取表约束中的索引列
+		constraintsIndexes := extractIndexesFromConstraints(util.GetTableConstraints(stmt.Constraints, util.GetIndexConstraintTypes()...))
+		// 此处要聚合到一个map中，否则无法获取存在于表约束中的冗余索引
+		mergeMap := mergeIndexsMaps(colIndexes, constraintsIndexes)
+		// 获取冗余索引
+		existsIndexs, redundantIndexs := GroupDuplicatesByValue(mergeMap)
+		buildAuditResult(input, existsIndexs, redundantIndexs)
 
 	case *ast.CreateIndexStmt:
 		// "create index..."
@@ -80,12 +85,11 @@ func RuleSQLE00055(input *rulepkg.RuleHandlerInput) error {
 		}
 
 		indexes := extractIndexesFromConstraints(util.GetTableConstraints(createTableStmt.Constraints, util.GetIndexConstraintTypes()...))
-		newIndex := extractIndexesFromIndexStmt(stmt.IndexPartSpecifications)
-
-		// check the index is not duplicated
-		for redundant, source := range calculateNewIndexRedundant(indexes, newIndex) {
-			rulepkg.AddResult(input.Res, input.Rule, SQLE00055, indexes[source], newIndex[redundant])
-		}
+		newIndexMap := make(map[string][]string)
+		newIndexMap[stmt.IndexName] = extractIndexesFromIndexStmt(stmt.IndexPartSpecifications)
+		// 获取冗余索引
+		existsIndexs, redundantIndexs := GetIntersectionByValue(indexes, newIndexMap)
+		buildAuditResult(input, existsIndexs, redundantIndexs)
 
 	case *ast.AlterTableStmt:
 		// "alter table"
@@ -96,25 +100,39 @@ func RuleSQLE00055(input *rulepkg.RuleHandlerInput) error {
 		}
 
 		indexes := extractIndexesFromConstraints(util.GetTableConstraints(createTableStmt.Constraints, util.GetIndexConstraintTypes()...))
-
-		newIndex := [][]string{}
+		constraints := make([]*ast.Constraint, 0)
 		for _, spec := range util.GetAlterTableCommandsByTypes(stmt, ast.AlterTableAddConstraint) {
 			// "alter table... add index..."
-			indexCols := extractIndexesFromConstraints(util.GetTableConstraints([]*ast.Constraint{spec.Constraint}, util.GetIndexConstraintTypes()...))
-			newIndex = append(newIndex, indexCols...)
-
-		}
-		// check the index is not duplicated
-		for redundant, source := range calculateNewIndexRedundant(indexes, newIndex) {
-			rulepkg.AddResult(input.Res, input.Rule, SQLE00055, indexes[source], newIndex[redundant])
+			constraints = append(constraints, spec.Constraint)
 		}
+
+		// // 获取冗余索引
+		newIndexs := extractIndexesFromConstraints(util.GetTableConstraints(constraints, util.GetIndexConstraintTypes()...))
+		existsIndexs, redundantIndexs := GetIntersectionByValue(indexes, newIndexs)
+		buildAuditResult(input, existsIndexs, redundantIndexs)
 	}
 
 	return nil
 }
 
-func extractIndexesFromConstraints(constraints []*ast.Constraint) [][]string {
-	indexes := [][]string{}
+func buildAuditResult(input *rulepkg.RuleHandlerInput, existsIndexs, redundantIndexs map[string][]string) {
+	idxNames := make([]string, 0, len(existsIndexs))
+	idxColNames := make([][]string, 0, len(existsIndexs))
+	redundantIdxNames := make([]string, 0, len(redundantIndexs))
+	for key, idx := range existsIndexs {
+		idxNames = append(idxNames, key)
+		idxColNames = append(idxColNames, idx)
+	}
+	for key := range redundantIndexs {
+		redundantIdxNames = append(redundantIdxNames, key)
+	}
+	if len(idxNames) > 0 && len(idxColNames) > 0 && len(redundantIdxNames) > 0 {
+		rulepkg.AddResult(input.Res, input.Rule, SQLE00055, idxColNames, strings.Join(idxNames, "、"), redundantIdxNames)
+	}
+}
+
+func extractIndexesFromConstraints(constraints []*ast.Constraint) map[string][]string {
+	indexes := make(map[string][]string)
 
 	// Iterate over all constraints to extract index columns.
 	for _, constraint := range constraints {
@@ -130,14 +148,30 @@ func extractIndexesFromConstraints(constraints []*ast.Constraint) [][]string {
 
 		// Only append to indexes if indexCols is not empty, avoiding adding empty index definitions.
 		if len(indexCols) > 0 {
-			indexes = append(indexes, indexCols)
+			key := constraint.Name
+			if key == "" && constraint.Tp == ast.ConstraintPrimaryKey {
+				key = "Primary Key"
+			}
+			indexes[key] = indexCols
 		}
 	}
 
 	return indexes
 }
 
-func extractIndexesFromIndexStmt(index []*ast.IndexPartSpecification) [][]string {
+func mergeIndexsMaps(m1, m2 map[string][]string,
+) map[string][]string {
+	result := make(map[string][]string, len(m1)+len(m2))
+	for k, v := range m1 {
+		result[k] = v
+	}
+	for k, v := range m2 {
+		result[k] = v
+	}
+	return result
+}
+
+func extractIndexesFromIndexStmt(index []*ast.IndexPartSpecification) []string {
 	// Initialize the slice that will hold the column names for the index.
 	var indexCols []string
 
@@ -150,73 +184,129 @@ func extractIndexesFromIndexStmt(index []*ast.IndexPartSpecification) [][]string
 
 	// Return the indexes as a slice of a single index if indexCols is not empty.
 	if len(indexCols) > 0 {
-		return [][]string{indexCols}
+		return indexCols
 	}
-	return [][]string{} // Return an empty slice of indexes if no index columns were found.
+	return []string{} // Return an empty slice of indexes if no index columns were found.
 }
 
-// calculateIndexRedundant takes a slice of string slices representing indexes and their columns
-// and returns a map where the key is the redundant index and the value is the corresponding
-// original index that makes it redundant.
-func calculateIndexRedundant(indexes [][]string) map[int]int {
-	redundantIndexes := make(map[int]int)
-
-	// Compare each index with every other index to check for redundancy.
-	for i, indexColumns := range indexes {
-		for j, otherIndexColumns := range indexes {
-			// Skip comparing the same index.
-			if i == j {
-				continue
-			}
+// GetIntersectionByValue 比较两个映射的值集合，返回两个结果映射：
+//
+//	commonInLeft:  左映射中，其值集合（或其左前缀）出现在右映射中的条目
+//	commonInRight: 右映射中，其值集合（或其左前缀）出现在左映射中的条目
+//
+// 参数：
+//
+//	leftMap  - 键到字符串切片的映射，作为左侧数据源
+//	rightMap - 键到字符串切片的映射，作为右侧数据源
+//
+// 返回：
+//
+//	commonInLeft  - 所有 leftMap 中，其值（或左前缀）能在 rightMap 中找到的项
+//	commonInRight - 所有 rightMap 中，其值（或左前缀）能在 leftMap 中找到的项
+func GetIntersectionByValue(leftMap, rightMap map[string][]string) (commonInLeft map[string][]string, commonInRight map[string][]string) {
+
+	// 初始化返回容器
+	commonInLeft = make(map[string][]string)
+	commonInRight = make(map[string][]string)
+
+	// --- 构建 rightFullIndex：右侧完整切片的快速查找索引 ---
+	// 示例：rightMap 中有 entry ["a","b","c"]，则索引中添加 "a,b,c"
+	rightFullIndex := make(map[string]bool)
+	for _, values := range rightMap {
+		fullKey := strings.Join(values, ",")
+		rightFullIndex[fullKey] = true
+	}
 
-			// Check if indexColumns are redundant with respect to otherIndexColumns.
-			if isRedundant(indexColumns, otherIndexColumns) {
-				// If index i is redundant and either it's not in the map yet, or it's in the map
-				// but the current source index j has fewer columns (and thus is a 'stronger' source of redundancy),
-				// then add/update the map with the index pair (i, j).
-				if _, exists := redundantIndexes[i]; !exists || len(indexes[redundantIndexes[i]]) > len(otherIndexColumns) {
-					redundantIndexes[i] = j
-				}
+	// --- 扫描 leftMap，检测左侧切片或其左前缀是否命中 rightFullIndex ---
+	for key, values := range leftMap {
+		// 从最长前缀到最短前缀依次尝试
+		for length := len(values); length > 0; length-- {
+			prefixKey := strings.Join(values[:length], ",")
+			if rightFullIndex[prefixKey] {
+				// 一旦命中，记录该 leftMap 条目为 commonInLeft，跳出前缀循环
+				commonInLeft[key] = copySlice(values)
+				break
 			}
 		}
 	}
 
-	return redundantIndexes
+	// --- 构造 leftPrefixIndex：左侧所有可能前缀的索引 ---
+	// 为了对称地支持右侧完整切片的匹配，需要将左侧每个 values 的所有左前缀都注册
+	// 例如 values=["a","b","c"]，则注册 "a"，"a,b"，"a,b,c"
+	leftPrefixIndex := make(map[string]bool)
+	for _, values := range leftMap {
+		for i := 1; i <= len(values); i++ {
+			sig := strings.Join(values[:i], ",")
+			leftPrefixIndex[sig] = true
+		}
+	}
+
+	// --- 扫描 rightMap，检测右侧完整切片是否命中 leftPrefixIndex ---
+	for key, values := range rightMap {
+		fullKey := strings.Join(values, ",")
+		if leftPrefixIndex[fullKey] {
+			// 如果右侧完整切片恰好是左侧某个切片的左前缀，则记录为 commonInRight
+			commonInRight[key] = copySlice(values)
+		}
+	}
+
+	return
 }
 
-// calculateNewIndexRedundant takes a slice of string slices representing existing indexes and their columns
-// and a slice of string slices representing new indexes' columns. It returns a map where the key is the index
-// of the new index in `newIndexes` and the value is the index of the existing index in `indexes` that makes it redundant.
-func calculateNewIndexRedundant(indexes [][]string, newIndexes [][]string) map[int]int {
-	redundantIndexes := make(map[int]int)
-
-	// Iterate over each new index
-	for newIndexI, newIndexCols := range newIndexes {
-		// Compare against each existing index
-		for existingIndexI, existingIndexCols := range indexes {
-			if isRedundant(newIndexCols, existingIndexCols) {
-				// If the new index is redundant with respect to an existing index, add to the map.
-				redundantIndexes[newIndexI] = existingIndexI
-				// Since we only care about the first occurrence of redundancy, we can break here.
+// GroupDuplicatesByValue 检测值集合重复的键，返回分组结果：
+// - originals: 每个唯一值集合（或其最长左前缀）首次出现的键及其值
+// - duplicates: 后续出现且与某已登记左前缀匹配的键及其值
+func GroupDuplicatesByValue(inputMap map[string][]string) (originals map[string][]string, duplicates map[string][]string) {
+	originals = make(map[string][]string)
+	duplicates = make(map[string][]string)
+
+	// prefixIndex：将“值切片左前缀签名”映射到首次出现该前缀的 key
+	prefixIndex := make(map[string]string)
+
+	// 为保证稳定性，先对所有 key 排序
+	keys := make([]string, 0, len(inputMap))
+	for k := range inputMap {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+
+	for _, key := range keys {
+		values := inputMap[key]
+		matched := false
+
+		// 从最长前缀到最短前缀尝试匹配
+		for length := len(values); length > 0; length-- {
+			sig := strings.Join(values[:length], ",")
+			if origKey, exists := prefixIndex[sig]; exists {
+				// 找到已有前缀，归为 duplicates
+				if !matched {
+					// 确保 originals 中保留最早出现的完整值
+					if _, ok := originals[origKey]; !ok {
+						originals[origKey] = copySlice(inputMap[origKey])
+					}
+					matched = true
+				}
+				duplicates[key] = copySlice(values)
 				break
 			}
 		}
+		if !matched {
+			// 全新起点，将自身所有左前缀注册
+			for length := len(values); length > 0; length-- {
+				sig := strings.Join(values[:length], ",")
+				prefixIndex[sig] = key
+			}
+		}
 	}
 
-	return redundantIndexes
+	return
 }
 
-// isRedundant checks if the first index is redundant with respect to the second index,
-// meaning all columns in the first index are a leftmost prefix of the second.
-func isRedundant(index1, index2 []string) bool {
-	for i, col := range index1 {
-		// If we reach the end of index2 or the columns differ,
-		// then index1 cannot be a leftmost prefix of index2.
-		if i >= len(index2) || col != index2[i] {
-			return false
-		}
-	}
-	return true
+// copySlice 深拷贝字符串slice
+func copySlice(src []string) []string {
+	dst := make([]string, len(src))
+	copy(dst, src)
+	return dst
 }
 
 // ==== Rule code end ====
diff --git a/sqle/driver/mysql/rule_00055_test.go b/sqle/driver/mysql/rule_00055_test.go