diff --git a/api/maven-api-model/src/main/mdo/maven.mdo b/api/maven-api-model/src/main/mdo/maven.mdo index b3f8478293dd..1ba9e93a1f1a 100644 --- a/api/maven-api-model/src/main/mdo/maven.mdo +++ b/api/maven-api-model/src/main/mdo/maven.mdo @@ -1354,7 +1354,7 @@ */ public String getManagementKey() { if (managementKey == null) { - managementKey = getGroupId() + ":" + getArtifactId() + ":" + getType() + (getClassifier() != null ? ":" + getClassifier() : ""); + managementKey = (getGroupId() + ":" + getArtifactId() + ":" + getType() + (getClassifier() != null ? ":" + getClassifier() : "")).intern(); } return managementKey; } diff --git a/compat/maven-model/src/test/java/org/apache/maven/model/pom/PomMemoryAnalyzer.java b/compat/maven-model/src/test/java/org/apache/maven/model/pom/PomMemoryAnalyzer.java new file mode 100644 index 000000000000..bea49c38170c --- /dev/null +++ b/compat/maven-model/src/test/java/org/apache/maven/model/pom/PomMemoryAnalyzer.java @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.maven.model.pom; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.maven.api.model.Model; +import org.apache.maven.model.v4.MavenStaxReader; + +/** + * A utility class that analyzes Maven POM files to identify memory usage patterns and potential memory optimizations. + * This analyzer focuses on identifying duplicate strings and their memory impact across different paths in the POM structure. + * + *

The analyzer processes POM files recursively, tracking string occurrences and their locations within the POM structure. + * It can identify areas where string deduplication could provide significant memory savings.

+ * + *

Usage example:

+ *
+ * PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
+ * Model model = reader.read(Files.newInputStream(pomPath));
+ * analyzer.analyzePom(model);
+ * analyzer.printAnalysis();
+ * 
+ * + *

The analysis output includes:

+ * + * + *

This tool is particularly useful for identifying memory optimization opportunities + * in large Maven multi-module projects where POM files may contain significant + * duplicate content.

+ */ +public class PomMemoryAnalyzer { + private final Map> pathStats = new HashMap<>(); + private final Map globalStringFrequency = new HashMap<>(); + private int totalPoms = 0; + + public static void main(String[] args) throws Exception { + if (args.length < 1) { + System.out.println("Usage: PomMemoryAnalyzer "); + System.exit(1); + } + + Path rootDir = Paths.get(args[0]); + PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer(); + MavenStaxReader reader = new MavenStaxReader(); + + // Find all pom.xml files, excluding those under src/ or target/ + Files.walk(rootDir) + .filter(path -> path.getFileName().toString().equals("pom.xml")) + .filter(path -> !containsSrcOrTarget(path)) + .forEach(pomPath -> { + try { + Model model = reader.read(Files.newInputStream(pomPath)); + analyzer.analyzePom(model); + } catch (Exception e) { + System.err.println("Error processing " + pomPath + ": " + e.getMessage()); + } + }); + + // Print analysis + analyzer.printAnalysis(); + } + + private static boolean containsSrcOrTarget(Path pomPath) { + Path parent = pomPath.getParent(); + while (parent != null && parent.getFileName() != null) { + String dirName = parent.getFileName().toString(); + if (dirName.equals("src") || dirName.equals("target")) { + return true; + } + parent = parent.getParent(); + } + return false; + } + + public void analyzePom(Model model) { + totalPoms++; + Set visited = new HashSet<>(); + processModelNode(model, "/project", "project", visited); + } + + private void processModelNode(Object node, String currentPath, String elementName, Set visited) { + if (node == null || !visited.add(node)) { + return; + } + + Class clazz = node.getClass(); + while (clazz != null && !clazz.equals(Object.class)) { + for (Field field : clazz.getDeclaredFields()) { + // Skip static fields and synthetic fields + if (Modifier.isStatic(field.getModifiers()) || field.isSynthetic()) { + continue; + } + + try { + field.setAccessible(true); + Object value = field.get(node); + if (value == null) continue; + + String fullPath = currentPath + "/" + field.getName(); + + if (value instanceof String) { + String strValue = (String) value; + recordString(fullPath, strValue); + globalStringFrequency.merge(strValue, 1, Integer::sum); + } else if (value instanceof List) { + List list = (List) value; + for (Object item : list) { + if (item != null) { + String itemName = getSingular(field.getName()); + processModelNode(item, fullPath + "/" + itemName, itemName, visited); + } + } + } else if (value instanceof Map) { + Map map = (Map) value; + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() != null) { + processModelNode( + entry.getValue(), + fullPath + "/" + entry.getKey(), + entry.getKey().toString(), + visited); + } + } + } else if (!value.getClass().isPrimitive() + && !value.getClass().getName().startsWith("java.")) { + processModelNode(value, fullPath, field.getName(), visited); + } + } catch (Exception e) { + // Skip inaccessible or problematic fields + } + } + clazz = clazz.getSuperclass(); + } + } + + private String getSingular(String plural) { + if (plural.endsWith("ies")) { + return plural.substring(0, plural.length() - 3) + "y"; + } + if (plural.endsWith("s")) { + return plural.substring(0, plural.length() - 1); + } + return plural; + } + + private void recordString(String path, String value) { + pathStats + .computeIfAbsent(path, k -> new HashMap<>()) + .computeIfAbsent(value, k -> new StringStats()) + .recordOccurrence(value); + } + + public List getPathAnalysisSorted() { + List analysis = new ArrayList<>(); + + for (Map.Entry> entry : pathStats.entrySet()) { + String path = entry.getKey(); + Map stats = entry.getValue(); + + long uniqueStrings = stats.size(); + long totalOccurrences = stats.values().stream() + .mapToLong(StringStats::getOccurrences) + .sum(); + long totalMemory = stats.entrySet().stream() + .mapToLong(e -> e.getKey().length() * e.getValue().getOccurrences() * 2L) + .sum(); + long potentialSavings = stats.entrySet().stream() + .mapToLong(e -> e.getKey().length() * 2L * (e.getValue().getOccurrences() - 1)) + .sum(); + + analysis.add(new PathAnalysis( + path, + uniqueStrings, + totalOccurrences, + totalMemory, + potentialSavings, + (double) totalOccurrences / uniqueStrings, + getMostFrequentValues(stats, 5))); + } + + analysis.sort((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings)); + return analysis; + } + + private List getMostFrequentValues(Map stats, int limit) { + return stats.entrySet().stream() + .map(e -> new ValueFrequency(e.getKey(), e.getValue().getOccurrences())) + .sorted((a, b) -> Long.compare(b.frequency, a.frequency)) + .limit(limit) + .collect(Collectors.toList()); + } + + public void printAnalysis() { + System.out.printf("Analyzed %d POMs%n%n", totalPoms); + + // First, get all paths + List allPaths = getPathAnalysisSorted(); + + // Create groups based on the final path component + Map> groupedPaths = new HashMap<>(); + Map> groupValueFrequencies = new HashMap<>(); + + for (PathAnalysis path : allPaths) { + String finalComponent = path.path.substring(path.path.lastIndexOf('/') + 1); + + // Add path to its group + groupedPaths.computeIfAbsent(finalComponent, k -> new ArrayList<>()).add(path); + + // Aggregate value frequencies for the group + Map groupFreqs = groupValueFrequencies.computeIfAbsent(finalComponent, k -> new HashMap<>()); + for (ValueFrequency vf : path.mostFrequentValues) { + groupFreqs.merge(vf.value, vf.frequency, Long::sum); + } + } + + // Create final group analyses and sort them by total savings + List sortedGroups = groupedPaths.entrySet().stream() + .map(entry -> { + String groupName = entry.getKey(); + List paths = entry.getValue(); + Map valueFreqs = groupValueFrequencies.get(groupName); + + long totalSavings = + paths.stream().mapToLong(p -> p.potentialSavings).sum(); + long totalMemory = + paths.stream().mapToLong(p -> p.totalMemory).sum(); + long totalUnique = valueFreqs.size(); + long totalOccurrences = + valueFreqs.values().stream().mapToLong(l -> l).sum(); + + List topValues = valueFreqs.entrySet().stream() + .map(e -> new ValueFrequency(e.getKey(), e.getValue())) + .sorted((a, b) -> Long.compare(b.frequency, a.frequency)) + .limit(5) + .collect(Collectors.toList()); + + return new GroupAnalysis( + groupName, paths, totalUnique, totalOccurrences, totalMemory, totalSavings, topValues); + }) + .sorted((a, b) -> Long.compare(b.totalSavings, a.totalSavings)) + .collect(Collectors.toList()); + + // Print each group + for (GroupAnalysis group : sortedGroups) { + System.out.printf("%nPaths ending with '%s':%n", group.name); + System.out.printf("Total potential savings: %dKB%n", group.totalSavings / 1024); + System.out.printf("Total memory: %dKB%n", group.totalMemory / 1024); + System.out.printf("Total unique values: %d%n", group.totalUnique); + System.out.printf("Total occurrences: %d%n", group.totalOccurrences); + System.out.printf("Duplication ratio: %.2f%n", (double) group.totalOccurrences / group.totalUnique); + + System.out.println("\nMost frequent values across all paths:"); + for (ValueFrequency v : group.mostFrequentValues) { + System.out.printf(" %-70s %d times%n", v.value, v.frequency); + } + + System.out.println("\nIndividual paths:"); + System.out.println("----------------------------------------"); + for (PathAnalysis path : group.paths.stream() + .sorted((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings)) + .collect(Collectors.toList())) { + System.out.printf( + "%-90s %6dKB %6dKB%n", path.path, path.totalMemory / 1024, path.potentialSavings / 1024); + } + System.out.println(); + } + } + + private static class GroupAnalysis { + final String name; + final List paths; + final long totalUnique; + final long totalOccurrences; + final long totalMemory; + final long totalSavings; + final List mostFrequentValues; + + GroupAnalysis( + String name, + List paths, + long totalUnique, + long totalOccurrences, + long totalMemory, + long totalSavings, + List mostFrequentValues) { + this.name = name; + this.paths = paths; + this.totalUnique = totalUnique; + this.totalOccurrences = totalOccurrences; + this.totalMemory = totalMemory; + this.totalSavings = totalSavings; + this.mostFrequentValues = mostFrequentValues; + } + } + + private static class StringStats { + private long occurrences = 0; + + public void recordOccurrence(String value) { + occurrences++; + } + + public long getOccurrences() { + return occurrences; + } + } + + public static class PathAnalysis { + public final String path; + public final long uniqueStrings; + public final long totalOccurrences; + public final long totalMemory; + public final long potentialSavings; + public final double duplicationRatio; + public final List mostFrequentValues; + + public PathAnalysis( + String path, + long uniqueStrings, + long totalOccurrences, + long totalMemory, + long potentialSavings, + double duplicationRatio, + List mostFrequentValues) { + this.path = path; + this.uniqueStrings = uniqueStrings; + this.totalOccurrences = totalOccurrences; + this.totalMemory = totalMemory; + this.potentialSavings = potentialSavings; + this.duplicationRatio = duplicationRatio; + this.mostFrequentValues = mostFrequentValues; + } + } + + public static class ValueFrequency { + public final String value; + public final long frequency; + + public ValueFrequency(String value, long frequency) { + this.value = value; + this.frequency = frequency; + } + } +} diff --git a/impl/maven-impl/src/main/java/org/apache/maven/impl/model/DefaultModelBuilder.java b/impl/maven-impl/src/main/java/org/apache/maven/impl/model/DefaultModelBuilder.java index 2be7f3d2d769..a39f981a13bd 100644 --- a/impl/maven-impl/src/main/java/org/apache/maven/impl/model/DefaultModelBuilder.java +++ b/impl/maven-impl/src/main/java/org/apache/maven/impl/model/DefaultModelBuilder.java @@ -1275,6 +1275,7 @@ Model doReadFileModel() throws ModelBuilderException { .path(modelSource.getPath()) .rootDirectory(rootDirectory) .inputStream(is) + .transformer(new InliningTransformer()) .build()); } catch (XmlReaderException e) { if (!strict) { @@ -1287,6 +1288,7 @@ Model doReadFileModel() throws ModelBuilderException { .path(modelSource.getPath()) .rootDirectory(rootDirectory) .inputStream(is) + .transformer(new InliningTransformer()) .build()); } catch (XmlReaderException ne) { // still unreadable even in non-strict mode, rethrow original error @@ -2029,4 +2031,23 @@ private boolean containsCoordinates(String message, String groupId, String artif } record GAKey(String groupId, String artifactId) {} + + static class InliningTransformer implements XmlReaderRequest.Transformer { + static final Set CONTEXTS = Set.of( + "groupId", + "artifactId", + "version", + "namespaceUri", + "packaging", + "scope", + "phase", + "layout", + "policy", + "checksumPolicy", + "updatePolicy"); + + public String transform(String input, String context) { + return CONTEXTS.contains(context) ? input.intern() : input; + } + } }