diff --git a/test/ci/filter_for_return_values b/test/ci/filter_for_return_values new file mode 100644 index 00000000000..734619a8aff --- /dev/null +++ b/test/ci/filter_for_return_values @@ -0,0 +1,24 @@ +match callExpr( + hasParent(anyOf( + compoundStmt(), + doStmt(hasCondition(expr().bind("cond")))) + ), + unless(hasType(voidType())), + unless(callee(functionDecl(hasName("memcpy")))), + unless(callee(functionDecl(hasName("strcpy")))), + unless(callee(functionDecl(hasName("strcat")))), + unless(callee(functionDecl(hasName("strncpy")))), + unless(callee(functionDecl(hasName("memset")))), + unless(callee(functionDecl(hasName("memmove")))), + unless(callee(functionDecl(hasName("sprintf")))), + unless(callee(functionDecl(hasName("snprintf")))), + unless(callee(functionDecl(hasName("scanf")))), + unless(callee(functionDecl(hasName("sncanf")))), + unless(callee(functionDecl(hasName("printf")))), + unless(callee(functionDecl(hasName("printRow")))), + unless(callee(functionDecl(hasName("puts")))), + unless(callee(functionDecl(hasName("sleep")))), + unless(callee(functionDecl(hasName("printResult")))), + unless(callee(functionDecl(hasName("getchar")))), + unless(callee(functionDecl(hasName("taos_print_row")))), + unless(callee(functionDecl(hasName("fprintf"))))) \ No newline at end of file diff --git a/test/ci/scan_file_path.py b/test/ci/scan_file_path.py index c2d3e4e3da7..765034a2fc9 100644 --- a/test/ci/scan_file_path.py +++ b/test/ci/scan_file_path.py @@ -5,10 +5,15 @@ from datetime import datetime from loguru import logger import getopt +import concurrent.futures +change_file_list = "" +scan_dir = "" +web_server = "" +branch_name = "" -opts, args = getopt.gnu_getopt(sys.argv[1:], 'b:f:w:', [ - 'branch_name=']) +opts, args = getopt.gnu_getopt(sys.argv[1:], 'b:f:w:d:', [ + 'branch_name=', 'filesName=', 'webServer=', 'dir=']) for key, value in opts: if key in ['-h', '--help']: print( @@ -16,15 +21,17 @@ print('-b branch name or PR ID to scan') print('-f change files list') print('-w web server') - + print('-d directory to scan') sys.exit(0) - + if key in ['-b', '--branchName']: branch_name = value if key in ['-f', '--filesName']: change_file_list = value if key in ['-w', '--webServer']: web_server = value + if key in ['-d', '--dir']: + scan_dir = value # the base source code file path @@ -42,6 +49,7 @@ index_community = self_path.find("community") if index_community != -1: TD_project_path = self_path[:index_community] + community_path = os.path.join(TD_project_path, "community") index_TDinternal = TD_project_path.find("TDinternal") # Check if index_TDinternal is valid and set work_path accordingly if index_TDinternal != -1: @@ -54,6 +62,8 @@ index_TDengine = TD_project_path.find("TDengine") if index_TDengine != -1: work_path = TD_project_path[:index_TDengine] + community_path = TD_project_path + # log file path @@ -64,35 +74,35 @@ scan_log_file = f"{log_file_path}/scan_log.txt" logger.add(scan_log_file, rotation="10MB", retention="7 days", level="DEBUG") -#if error happens, open this to debug -# print(self_path,work_path,TD_project_path,log_file_path,change_file_list) # scan result base path scan_result_base_path = f"{log_file_path}/clang_scan_result/" - # the compile commands json file path -# compile_commands_path = f"{work_path}/debugNoSan/compile_commands.json" compile_commands_path = f"{TD_project_path}/debug/compile_commands.json" -#if error happens, open this to debug -# print(f"compile_commands_path:{compile_commands_path}") - -# # replace the docerk worf path with real work path in compile_commands.json -# docker_work_path = "home" -# replace_path= work_path[1:-1] -# replace_path = replace_path.replace("/", "\/") -# sed_command = f"sed -i 's/{docker_work_path}/{replace_path}/g' {compile_commands_path}" -# print(sed_command) -# result = subprocess.run(sed_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) -# logger.debug(f"STDOUT: {result.stdout} STDERR: {result.stderr}") # the ast parser rule for c file clang_scan_rules_path = f"{self_path}/filter_for_return_values" +scan_dir_list = ["source", "include", "docs/examples", "src/plugins"] # # all the c files path will be checked all_file_path = [] +file_res_path = "" + +SCAN_DIRS = ["source", "include", "docs/examples", "src/plugins"] +SCAN_SKIP_FILE_LIST = [ + "tools/taosws-rs/target/release/build/openssl-sys-7811e597b848e397/out/openssl-build/install/include/openssl", + "/test/", + "contrib", + "debug", + "deps", + "source/libs/parser/src/sql.c", + "source/client/jni/windows/win32/bridge/AccessBridgeCalls.c", + "source/libs/decimal/", + "source/libs/azure", +] class CommandExecutor: def __init__(self): @@ -114,50 +124,87 @@ def execute(self, command, timeout=None): raise Exception("Command execution failed: %s" % e) def scan_files_path(source_file_path): - # scan_dir_list = ["source", "include", "docs/examples", "tests/script/api", "src/plugins"] - scan_dir_list = ["source", "include", "docs/examples", "src/plugins"] - scan_skip_file_list = ["/root/charles/TDinternal/community/tools/taosws-rs/target/release/build/openssl-sys-7811e597b848e397/out/openssl-build/install/include/openssl", - "/test/", "contrib", "debug", "deps", "/root/charles/TDinternal/community/source/libs/parser/src/sql.c", "/root/charles/TDinternal/community/source/client/jni/windows/win32/bridge/AccessBridgeCalls.c"] + """Walk directory and append candidate files under SCAN_DIRS.""" for root, dirs, files in os.walk(source_file_path): + if not any(item in root for item in SCAN_DIRS): + continue for file in files: - if any(item in root for item in scan_dir_list): - file_path = os.path.join(root, file) - if (file_path.endswith(".c") or file_path.endswith(".h") or file_path.endswith(".cpp")) and all(item not in file_path for item in scan_skip_file_list): - all_file_path.append(file_path) + file_path = os.path.join(root, file) + if (file_path.endswith(".c") or file_path.endswith(".h") or file_path.endswith(".cpp")) \ + and all(skip not in file_path for skip in SCAN_SKIP_FILE_LIST): + all_file_path.append(file_path) logger.info("Found %s files" % len(all_file_path)) +def add_candidate_file(file_name): + """Normalize a single file token from change list and append if valid. + - If absolute path provided: append it directly (if exists and not skipped). + - Otherwise, build path relative to TD_project_path/community or TD_project_path. + """ + # skip empty + if not file_name: + return + + # If absolute path given, use it directly (but validate extension/skip rules) + if os.path.isabs(file_name): + if not os.path.exists(file_name): + logger.warning(f"Changed file not found (abs): {file_name}") + return + if not (file_name.endswith(".c") or file_name.endswith(".h") or file_name.endswith(".cpp")): + return + if any(skip in file_name for skip in SCAN_SKIP_FILE_LIST): + return + all_file_path.append(file_name) + return + + # only care tokens that contain interesting dirs + if not any(dir_name in file_name for dir_name in SCAN_DIRS): + return + + # skip by pattern + if not (file_name.endswith(".c") or file_name.endswith(".h") or file_name.endswith(".cpp")): + return + if any(skip in file_name for skip in SCAN_SKIP_FILE_LIST): + return + + # build path relative to repo layout + if index_community != -1: + if "enterprise" in file_name: + candidate = os.path.join(TD_project_path, file_name) + else: + candidate = os.path.join(community_path, file_name) + else: + candidate = os.path.join(community_path, file_name) + + if not os.path.exists(candidate): + logger.warning(f"Changed file not found: {candidate}") + return + + all_file_path.append(candidate) + def input_files(change_files): - # scan_dir_list = ["source", "include", "docs/examples", "tests/script/api", "src/plugins"] - scan_dir_list = ["source", "include", "docs/examples", "src/plugins"] - scan_skip_file_list = ["tools/taosws-rs/target/release/build/openssl-sys-7811e597b848e397/out/openssl-build/install/include/openssl", "/test/", "contrib", "debug", "deps", "source/libs/parser/src/sql.c", "source/libs/azure", "source/client/jni/windows/win32/bridge/AccessBridgeCalls.c"] + """Read change list and process each token by add_candidate_file""" with open(change_files, 'r') as file: for line in file: - file_name = line.strip() - if any(dir_name in file_name for dir_name in scan_dir_list): - if (file_name.endswith(".c") or file_name.endswith(".cpp")) and all(dir_name not in file_name for dir_name in scan_skip_file_list): - if "enterprise" in file_name: - file_name = os.path.join(TD_project_path, file_name) - else: - tdc_file_path = os.path.join(TD_project_path, "community/") - file_name = os.path.join(tdc_file_path, file_name) - all_file_path.append(file_name) - print(f"all_file_path:{all_file_path}") + for file_name in line.strip().split(): + add_candidate_file(file_name) + logger.info(f"all_file_path:{all_file_path}") logger.info("Found %s files" % len(all_file_path)) -file_res_path = "" + + def save_scan_res(res_base_path, file_path, out, err): - global file_res_path - file_res_path = os.path.join(res_base_path, file_path.replace(f"{work_path}", "").split(".")[0] + ".txt") - # print(f"file_res_path:{file_res_path},res_base_path:{res_base_path},file_path:{file_path}") + rel_path = file_path.replace(f"{work_path}", "") + base, ext = os.path.splitext(rel_path) + ext_map = {".c": "-c", ".h": "-h", ".cpp": "-cpp"} + suffix = ext_map.get(ext, ext) + file_res_path = os.path.join(res_base_path, base.lstrip(os.sep) + suffix + ".txt") if not os.path.exists(os.path.dirname(file_res_path)): os.makedirs(os.path.dirname(file_res_path)) logger.info("Save scan result to: %s" % file_res_path) - - # save scan result with open(file_res_path, "w") as f: f.write(err) f.write(out) - logger.debug(f"file_res_file: {file_res_path}") + return file_res_path def write_csv(file_path, data): try: @@ -166,12 +213,32 @@ def write_csv(file_path, data): writer.writerows(data) except Exception as ex: raise Exception("Failed to write the csv file: {} with msg: {}".format(file_path, repr(ex))) - + +def scan_one_file(file): + cmd = f"clang-query-16 -p {compile_commands_path} {file} -f {clang_scan_rules_path} 2>&1 | grep -v 'error:' | grep -v 'warning:'" + logger.debug(f"cmd:{cmd}") + try: + stdout, stderr = CommandExecutor().execute(cmd) + lines = stdout.split("\n") + scan_valid = len(lines) >= 2 and (lines[-2].endswith("matches.") or lines[-2].endswith("match.")) + if scan_valid: + match_num = int(lines[-2].split(" ")[0]) + logger.info("The match lines of file %s: %s" % (file, match_num)) + this_file_res_path = save_scan_res(log_file_path, file, stdout, stderr) + return [file, this_file_res_path, match_num, 'Pass' if match_num == 0 else 'Fail'] + else: + logger.warning("The result of scan is invalid for: %s" % file) + this_file_res_path = save_scan_res(log_file_path, file, stdout, stderr) + return [file, this_file_res_path, 0, 'Invalid'] + except Exception as e: + logger.error("Execute command failed: %s" % e) + return [file, "", 0, 'Error'] + if __name__ == "__main__": command_executor = CommandExecutor() # get all the c files path - # scan_files_path(TD_project_path) - input_files(change_file_list) + # scan_files_path("/root/TDinternal/community/source/") + # input_files(change_file_list) # print(f"all_file_path:{all_file_path}") res = [] web_path = [] @@ -183,31 +250,52 @@ def write_csv(file_path, data): # if not os.path.exists(scan_result_path): # os.makedirs(scan_result_path) - for file in all_file_path: - cmd = f"clang-query-16 -p {compile_commands_path} {file} -f {clang_scan_rules_path}" - logger.debug(f"cmd:{cmd}") - try: - stdout, stderr = command_executor.execute(cmd) - #if "error" in stderr: - print(stderr) - lines = stdout.split("\n") - if lines[-2].endswith("matches.") or lines[-2].endswith("match."): - match_num = int(lines[-2].split(" ")[0]) - logger.info("The match lines of file %s: %s" % (file, match_num)) - if match_num > 0: - logger.info(f"log_file_path: {log_file_path} ,file:{file}") - save_scan_res(log_file_path, file, stdout, stderr) - index_tests = file_res_path.find("scan_log") - if index_tests != -1: - web_path_file = file_res_path[index_tests:] - web_path_file = os.path.join(web_server, web_path_file) - web_path.append(web_path_file) - res.append([file, file_res_path, match_num, 'Pass' if match_num == 0 else 'Fail']) - - else: - logger.warning("The result of scan is invalid for: %s" % file) - except Exception as e: - logger.error("Execute command failed: %s" % e) + # 优先用 -d 指定目录,否则用 -f 文件列表,否则默认目录 + if change_file_list: + input_files(change_file_list) + elif scan_dir: + scan_files_path(scan_dir) + else: + for sub_dir in scan_dir_list: + abs_dir = os.path.join(TD_project_path,"community", sub_dir) + print(abs_dir) + if os.path.exists(abs_dir): + scan_files_path(abs_dir) + + all_file_path = list(set(all_file_path)) + # 多进程并发扫描 + with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor: + results = list(executor.map(scan_one_file, all_file_path)) + res.extend(results) + + # for file in all_file_path: + # cmd = f"clang-query-16 -p {compile_commands_path} {file} -f {clang_scan_rules_path} 2>&1 | grep -v 'error:' | grep -v 'warning:'" + # logger.debug(f"cmd:{cmd}") + # try: + # stdout, stderr = command_executor.execute(cmd) + # print(stderr) + # lines = stdout.split("\n") + # scan_valid = len(lines) >= 2 and (lines[-2].endswith("matches.") or lines[-2].endswith("match.")) + # if scan_valid: + # match_num = int(lines[-2].split(" ")[0]) + # logger.info("The match lines of file %s: %s" % (file, match_num)) + # this_file_res_path = save_scan_res(log_file_path, file, stdout, stderr) + # if match_num > 0: + # # logger.info(f"log_file_path: {log_file_path} ,file:{file}") + # index_tests = this_file_res_path.find("scan_log") + # if index_tests != -1: + # web_path_file = this_file_res_path[index_tests:] + # web_path_file = os.path.join(web_server, web_path_file) + # web_path.append(web_path_file) + # res.append([file, this_file_res_path, match_num, 'Pass' if match_num == 0 else 'Fail']) + # else: + # logger.warning("The result of scan is invalid for: %s" % file) + # this_file_res_path = save_scan_res(log_file_path, file, stdout, stderr) + # res.append([file, this_file_res_path, 0, 'Invalid']) + # except Exception as e: + # logger.error("Execute command failed: %s" % e) + # this_file_res_path = "" + # res.append([file, this_file_res_path, 0, 'Error']) # data = "" # for item in res: # data += item[0] + "," + str(item[1]) + "\n" @@ -215,15 +303,31 @@ def write_csv(file_path, data): write_csv(os.path.join(log_file_path, "scan_res.txt"), res) scan_result_log = f"{log_file_path}/scan_res.txt" # delete the first element of res - res= res[1:] + res.pop(0) logger.info("The result of scan: \n") logger.info("Total scan files: %s" % len(res)) logger.info("Total match lines: %s" % sum([item[2] for item in res])) logger.info(f"scan log file : {scan_result_log}") logger.info("Pass files: %s" % len([item for item in res if item[3] == 'Pass'])) logger.info("Fail files: %s" % len([item for item in res if item[3] == 'Fail'])) - if len([item for item in res if item[3] == 'Fail']) > 0: - logger.error(f"Scan failed,please check the log file:{scan_result_log}") - for index, failed_result_file in enumerate(web_path): - logger.error(f"failed number: {index}, failed_result_file: {failed_result_file}") - exit(1) + fail_files = [item for item in res if item[3] == 'Fail'] + + logger.error(f"Total failed files: {len(fail_files)}") + if web_server: + # 打印 web_path + for index, fail_item in enumerate(fail_files): + file_res_path = fail_item[1] + index_tests = file_res_path.find("scan_log") + if index_tests != -1: + web_path_file = os.path.join(web_server, file_res_path[index_tests:]) + else: + web_path_file = file_res_path + logger.error(f"failed number: {index+1}, failed_result_file: {web_path_file}") + else: + # 打印本地路径 + for index, fail_item in enumerate(fail_files): + logger.error(f"failed number: {index+1}, failed_result_file: {fail_item[1]}") + + if len(fail_files) > 0: + logger.error(f"Scan failed, please check the log file: {scan_result_log}") + sys.exit(1) \ No newline at end of file