From fca5e2f24e09557ae9d0c8b168e40039725ee450 Mon Sep 17 00:00:00 2001 From: Richard Li <742829+rli@users.noreply.github.com> Date: Thu, 17 Apr 2025 15:47:04 -0700 Subject: [PATCH 1/2] fix(amazonq): switch to ulong to avoid overflow when input is larger than 2gb (#5558) 2GB in bytes > INT_MAX so use ULong, which can handle 18 PB --- ...-d2a9278c-d71f-463f-b899-f56a587829e8.json | 4 + .../settings/CodeWhispererConfigurable.kt | 4 +- .../CodeWhispererSettingsTest.kt | 35 ++++++ .../amazonq/project/ProjectContextProvider.kt | 108 +++++++++--------- .../settings/CodeWhispererSettings.kt | 15 ++- 5 files changed, 104 insertions(+), 62 deletions(-) create mode 100644 .changes/next-release/bugfix-d2a9278c-d71f-463f-b899-f56a587829e8.json diff --git a/.changes/next-release/bugfix-d2a9278c-d71f-463f-b899-f56a587829e8.json b/.changes/next-release/bugfix-d2a9278c-d71f-463f-b899-f56a587829e8.json new file mode 100644 index 00000000000..6356b23fa2c --- /dev/null +++ b/.changes/next-release/bugfix-d2a9278c-d71f-463f-b899-f56a587829e8.json @@ -0,0 +1,4 @@ +{ + "type" : "bugfix", + "description" : "Fix integer overflow when local context index input is larger than 2GB" +} \ No newline at end of file diff --git a/plugins/amazonq/codewhisperer/jetbrains-community/src/software/aws/toolkits/jetbrains/services/codewhisperer/settings/CodeWhispererConfigurable.kt b/plugins/amazonq/codewhisperer/jetbrains-community/src/software/aws/toolkits/jetbrains/services/codewhisperer/settings/CodeWhispererConfigurable.kt index 9589d8c0a96..8d035faf09a 100644 --- a/plugins/amazonq/codewhisperer/jetbrains-community/src/software/aws/toolkits/jetbrains/services/codewhisperer/settings/CodeWhispererConfigurable.kt +++ b/plugins/amazonq/codewhisperer/jetbrains-community/src/software/aws/toolkits/jetbrains/services/codewhisperer/settings/CodeWhispererConfigurable.kt @@ -133,7 +133,7 @@ class CodeWhispererConfigurable(private val project: Project) : row(message("aws.settings.codewhisperer.project_context_index_thread")) { intTextField( - range = IntRange(0, 50) + range = CodeWhispererSettings.CONTEXT_INDEX_THREADS ).bindIntText(codeWhispererSettings::getProjectContextIndexThreadCount, codeWhispererSettings::setProjectContextIndexThreadCount) .apply { connect.subscribe( @@ -150,7 +150,7 @@ class CodeWhispererConfigurable(private val project: Project) : row(message("aws.settings.codewhisperer.project_context_index_max_size")) { intTextField( - range = IntRange(1, 4096) + range = CodeWhispererSettings.CONTEXT_INDEX_SIZE ).bindIntText(codeWhispererSettings::getProjectContextIndexMaxSize, codeWhispererSettings::setProjectContextIndexMaxSize) .apply { connect.subscribe( diff --git a/plugins/amazonq/codewhisperer/jetbrains-community/tst/software/aws/toolkits/jetbrains/services/codewhisperer/CodeWhispererSettingsTest.kt b/plugins/amazonq/codewhisperer/jetbrains-community/tst/software/aws/toolkits/jetbrains/services/codewhisperer/CodeWhispererSettingsTest.kt index 94474eefa6a..643831a96a2 100644 --- a/plugins/amazonq/codewhisperer/jetbrains-community/tst/software/aws/toolkits/jetbrains/services/codewhisperer/CodeWhispererSettingsTest.kt +++ b/plugins/amazonq/codewhisperer/jetbrains-community/tst/software/aws/toolkits/jetbrains/services/codewhisperer/CodeWhispererSettingsTest.kt @@ -211,6 +211,41 @@ class CodeWhispererSettingsTest : CodeWhispererTestBase() { assertThat(actual.autoBuildSetting).hasSize(1) assertThat(actual.autoBuildSetting["project1"]).isTrue() } + + @Test + fun `context thread count is returned in range`() { + val sut = CodeWhispererSettings.getInstance() + + mapOf( + 1 to 1, + 0 to 0, + -1 to 0, + 123 to 50, + 50 to 50, + 51 to 50, + ).forEach { s, expected -> + sut.setProjectContextIndexThreadCount(s) + assertThat(sut.getProjectContextIndexThreadCount()).isEqualTo(expected) + } + } + + @Test + fun `context index size is returned in range`() { + val sut = CodeWhispererSettings.getInstance() + + mapOf( + 1 to 1, + 0 to 1, + -1 to 1, + 123 to 123, + 2047 to 2047, + 4096 to 4096, + 4097 to 4096, + ).forEach { s, expected -> + sut.setProjectContextIndexMaxSize(s) + assertThat(sut.getProjectContextIndexMaxSize()).isEqualTo(expected) + } + } } class CodeWhispererSettingUnitTest { diff --git a/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/ProjectContextProvider.kt b/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/ProjectContextProvider.kt index 75ab0df4f46..854124a180f 100644 --- a/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/ProjectContextProvider.kt +++ b/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/ProjectContextProvider.kt @@ -70,7 +70,7 @@ class ProjectContextProvider(val project: Project, private val encoderServer: En data class FileCollectionResult( val files: List, - val fileSize: Int, + val fileSize: Int, // in MB ) // TODO: move to LspMessage.kt @@ -246,59 +246,7 @@ class ProjectContextProvider(val project: Project, private val encoderServer: En } } - private fun willExceedPayloadLimit(currentTotalFileSize: Long, currentFileSize: Long): Boolean { - val maxSize = CodeWhispererSettings.getInstance().getProjectContextIndexMaxSize() - return currentTotalFileSize.let { totalSize -> totalSize > (maxSize * 1024 * 1024 - currentFileSize) } - } - - private fun isBuildOrBin(fileName: String): Boolean { - val regex = Regex("""bin|build|node_modules|venv|\.venv|env|\.idea|\.conda""", RegexOption.IGNORE_CASE) - return regex.find(fileName) != null - } - - fun collectFiles(): FileCollectionResult { - val collectedFiles = mutableListOf() - var currentTotalFileSize = 0L - val allFiles = mutableListOf() - - val projectBaseDirectories = project.getBaseDirectories() - val changeListManager = ChangeListManager.getInstance(project) - - projectBaseDirectories.forEach { - VfsUtilCore.visitChildrenRecursively( - it, - object : VirtualFileVisitor(NO_FOLLOW_SYMLINKS) { - // TODO: refactor this along with /dev & codescan file traversing logic - override fun visitFile(file: VirtualFile): Boolean { - if ((file.isDirectory && isBuildOrBin(file.name)) || - !isWorkspaceSourceContent(file, projectBaseDirectories, changeListManager, additionalGlobalIgnoreRulesForStrictSources) || - (file.isFile && file.length > 10 * 1024 * 1024) - ) { - return false - } - if (file.isFile) { - allFiles.add(file) - return false - } - return true - } - } - ) - } - - for (file in allFiles) { - if (willExceedPayloadLimit(currentTotalFileSize, file.length)) { - break - } - collectedFiles.add(file.path) - currentTotalFileSize += file.length - } - - return FileCollectionResult( - files = collectedFiles.toList(), - fileSize = (currentTotalFileSize / 1024 / 1024).toInt() - ) - } + fun collectFiles(): FileCollectionResult = collectFiles(project.getBaseDirectories(), ChangeListManager.getInstance(project)) private fun queryResultToRelevantDocuments(queryResult: List): List { val documents: MutableList = mutableListOf() @@ -358,5 +306,57 @@ class ProjectContextProvider(val project: Project, private val encoderServer: En companion object { private val logger = getLogger() + private val regex = Regex("""bin|build|node_modules|venv|\.venv|env|\.idea|\.conda""", RegexOption.IGNORE_CASE) + private val mega = (1024 * 1024).toULong() + private val tenMb = 10 * mega.toInt() + + private fun willExceedPayloadLimit(maxSize: ULong, currentTotalFileSize: ULong, currentFileSize: Long) = + currentTotalFileSize.let { totalSize -> totalSize > (maxSize - currentFileSize.toUInt()) } + + private fun isBuildOrBin(fileName: String): Boolean = + regex.find(fileName) != null + + fun collectFiles(projectBaseDirectories: Set, changeListManager: ChangeListManager): FileCollectionResult { + val maxSize = CodeWhispererSettings.getInstance() + .getProjectContextIndexMaxSize().toULong() * mega + val collectedFiles = mutableListOf() + var currentTotalFileSize = 0UL + val allFiles = mutableListOf() + + projectBaseDirectories.forEach { + VfsUtilCore.visitChildrenRecursively( + it, + object : VirtualFileVisitor(NO_FOLLOW_SYMLINKS) { + // TODO: refactor this along with /dev & codescan file traversing logic + override fun visitFile(file: VirtualFile): Boolean { + if ((file.isDirectory && isBuildOrBin(file.name)) || + !isWorkspaceSourceContent(file, projectBaseDirectories, changeListManager, additionalGlobalIgnoreRulesForStrictSources) || + (file.isFile && file.length > tenMb) + ) { + return false + } + if (file.isFile) { + allFiles.add(file) + return false + } + return true + } + } + ) + } + + for (file in allFiles) { + if (willExceedPayloadLimit(maxSize, currentTotalFileSize, file.length)) { + break + } + collectedFiles.add(file.path) + currentTotalFileSize += file.length.toUInt() + } + + return FileCollectionResult( + files = collectedFiles.toList(), + fileSize = (currentTotalFileSize / 1024u / 1024u).toInt() + ) + } } } diff --git a/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/settings/CodeWhispererSettings.kt b/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/settings/CodeWhispererSettings.kt index 45b96113dcc..30f46dd6636 100644 --- a/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/settings/CodeWhispererSettings.kt +++ b/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/settings/CodeWhispererSettings.kt @@ -92,7 +92,7 @@ class CodeWhispererSettings : PersistentStateComponent Date: Thu, 17 Apr 2025 15:47:31 -0700 Subject: [PATCH 2/2] fix(amazonq): avoid workspace index process failure (#5595) Orphaned http request that should be rejected somehow sneaked into the http request event loop while the FAISS index is not ready, causing the workspace LSP process to terminate, which causes the JetBrains IDE to re-initialize the workspace LSP process, which further triggers an infinite loop of log storm that caused slowness (the log loop issue is fixed in https://github.com/aws/aws-toolkit-jetbrains/pull/5581). Here are the sequence of events that happened: 1. JB starts workspace LSP, the LSP then works on tree sitter parsing to generate repomap. 2. When #1 is in progress, client (user) uses @workspace feature sends a request for vector index query. #1 is usually fast but for 1.4GB repo like https://github.com/elastic/elasticsearch (1.4GB), it takes 6 min. 3. Node js event loop busy, client request #2 is timed out. However, requests is cached at server and it becomes an Orphaned http request. 4. The moment when tree sitter parsing is done, node js event loop SOMEHOW immediately handles the Orphaned request in step 2 at a certain possibility! 5. The vector index is not undefined, it was partially initialized, but it had no chunk inside, query when 0 chunks caused Faiss to crash, which terminated the LSP process. 6. JB saw java.net.ConnectException: Connection refused, it then forces LSP to restart, which restarts the indexing, causing performance issue. --- .../bugfix-de10b7d3-8c8a-4968-88cb-ed28ce3a553e.json | 4 ++++ .../services/amazonq/project/manifest/ManifestManager.kt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 .changes/next-release/bugfix-de10b7d3-8c8a-4968-88cb-ed28ce3a553e.json diff --git a/.changes/next-release/bugfix-de10b7d3-8c8a-4968-88cb-ed28ce3a553e.json b/.changes/next-release/bugfix-de10b7d3-8c8a-4968-88cb-ed28ce3a553e.json new file mode 100644 index 00000000000..1c00b0e3f21 --- /dev/null +++ b/.changes/next-release/bugfix-de10b7d3-8c8a-4968-88cb-ed28ce3a553e.json @@ -0,0 +1,4 @@ +{ + "type" : "bugfix", + "description" : "Fix workspace index process quits when hitting a race condition" +} \ No newline at end of file diff --git a/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/manifest/ManifestManager.kt b/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/manifest/ManifestManager.kt index 52b95acf976..69eb7ddf78f 100644 --- a/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/manifest/ManifestManager.kt +++ b/plugins/amazonq/shared/jetbrains-community/src/software/aws/toolkits/jetbrains/services/amazonq/project/manifest/ManifestManager.kt @@ -15,7 +15,7 @@ import software.aws.toolkits.jetbrains.core.getTextFromUrl class ManifestManager { private val cloudFrontUrl = "https://aws-toolkit-language-servers.amazonaws.com/q-context/manifest.json" - val currentVersion = "0.1.46" + val currentVersion = "0.1.49" val currentOs = getOs() private val arch = CpuArch.CURRENT private val mapper = jacksonObjectMapper()