Skip to content

Commit b1d164f

Browse files
committed
[MNG-7592] String deduplication in model building
1 parent 8b3e640 commit b1d164f

File tree

3 files changed

+402
-1
lines changed

3 files changed

+402
-1
lines changed

api/maven-api-model/src/main/mdo/maven.mdo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1354,7 +1354,7 @@
13541354
*/
13551355
public String getManagementKey() {
13561356
if (managementKey == null) {
1357-
managementKey = getGroupId() + ":" + getArtifactId() + ":" + getType() + (getClassifier() != null ? ":" + getClassifier() : "");
1357+
managementKey = (getGroupId() + ":" + getArtifactId() + ":" + getType() + (getClassifier() != null ? ":" + getClassifier() : "")).intern();
13581358
}
13591359
return managementKey;
13601360
}
Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,380 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.maven.model.pom;
20+
21+
import org.apache.maven.api.model.Model;
22+
import org.apache.maven.model.v4.MavenStaxReader;
23+
24+
import java.lang.reflect.Field;
25+
import java.lang.reflect.Modifier;
26+
import java.nio.file.Files;
27+
import java.nio.file.Path;
28+
import java.nio.file.Paths;
29+
import java.util.ArrayList;
30+
import java.util.HashMap;
31+
import java.util.HashSet;
32+
import java.util.List;
33+
import java.util.Map;
34+
import java.util.Set;
35+
import java.util.stream.Collectors;
36+
37+
38+
/**
39+
* A utility class that analyzes Maven POM files to identify memory usage patterns and potential memory optimizations.
40+
* This analyzer focuses on identifying duplicate strings and their memory impact across different paths in the POM structure.
41+
*
42+
* <p>The analyzer processes POM files recursively, tracking string occurrences and their locations within the POM structure.
43+
* It can identify areas where string deduplication could provide significant memory savings.</p>
44+
*
45+
* <p>Usage example:</p>
46+
* <pre>
47+
* PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
48+
* Model model = reader.read(Files.newInputStream(pomPath));
49+
* analyzer.analyzePom(model);
50+
* analyzer.printAnalysis();
51+
* </pre>
52+
*
53+
* <p>The analysis output includes:</p>
54+
* <ul>
55+
* <li>Total memory usage per POM path</li>
56+
* <li>Potential memory savings through string deduplication</li>
57+
* <li>Most frequent string values and their occurrence counts</li>
58+
* <li>Statistics grouped by POM element types</li>
59+
* </ul>
60+
*
61+
* <p>This tool is particularly useful for identifying memory optimization opportunities
62+
* in large Maven multi-module projects where POM files may contain significant
63+
* duplicate content.</p>
64+
*/
65+
public class PomMemoryAnalyzer {
66+
private final Map<String, Map<String, StringStats>> pathStats = new HashMap<>();
67+
private final Map<String, Integer> globalStringFrequency = new HashMap<>();
68+
private int totalPoms = 0;
69+
70+
public static void main(String[] args) throws Exception {
71+
if (args.length < 1) {
72+
System.out.println("Usage: PomMemoryAnalyzer <directory-with-poms>");
73+
System.exit(1);
74+
}
75+
76+
Path rootDir = Paths.get(args[0]);
77+
PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
78+
MavenStaxReader reader = new MavenStaxReader();
79+
80+
// Find all pom.xml files, excluding those under src/ or target/
81+
Files.walk(rootDir)
82+
.filter(path -> path.getFileName().toString().equals("pom.xml"))
83+
.filter(path -> !containsSrcOrTarget(path))
84+
.forEach(pomPath -> {
85+
try {
86+
Model model = reader.read(Files.newInputStream(pomPath));
87+
analyzer.analyzePom(model);
88+
} catch (Exception e) {
89+
System.err.println("Error processing " + pomPath + ": " + e.getMessage());
90+
}
91+
});
92+
93+
// Print analysis
94+
analyzer.printAnalysis();
95+
}
96+
97+
private static boolean containsSrcOrTarget(Path pomPath) {
98+
Path parent = pomPath.getParent();
99+
while (parent != null && parent.getFileName() != null) {
100+
String dirName = parent.getFileName().toString();
101+
if (dirName.equals("src") || dirName.equals("target")) {
102+
return true;
103+
}
104+
parent = parent.getParent();
105+
}
106+
return false;
107+
}
108+
109+
public void analyzePom(Model model) {
110+
totalPoms++;
111+
Set<Object> visited = new HashSet<>();
112+
processModelNode(model, "/project", "project", visited);
113+
}
114+
115+
private void processModelNode(Object node, String currentPath, String elementName, Set<Object> visited) {
116+
if (node == null || !visited.add(node)) {
117+
return;
118+
}
119+
120+
Class<?> clazz = node.getClass();
121+
while (clazz != null && !clazz.equals(Object.class)) {
122+
for (Field field : clazz.getDeclaredFields()) {
123+
// Skip static fields and synthetic fields
124+
if (Modifier.isStatic(field.getModifiers()) || field.isSynthetic()) {
125+
continue;
126+
}
127+
128+
try {
129+
field.setAccessible(true);
130+
Object value = field.get(node);
131+
if (value == null) continue;
132+
133+
String fullPath = currentPath + "/" + field.getName();
134+
135+
if (value instanceof String) {
136+
String strValue = (String) value;
137+
recordString(fullPath, strValue);
138+
globalStringFrequency.merge(strValue, 1, Integer::sum);
139+
} else if (value instanceof List) {
140+
List<?> list = (List<?>) value;
141+
for (Object item : list) {
142+
if (item != null) {
143+
String itemName = getSingular(field.getName());
144+
processModelNode(item, fullPath + "/" + itemName, itemName, visited);
145+
}
146+
}
147+
} else if (value instanceof Map) {
148+
Map<?, ?> map = (Map<?, ?>) value;
149+
for (Map.Entry<?, ?> entry : map.entrySet()) {
150+
if (entry.getValue() != null) {
151+
processModelNode(
152+
entry.getValue(),
153+
fullPath + "/" + entry.getKey(),
154+
entry.getKey().toString(),
155+
visited);
156+
}
157+
}
158+
} else if (!value.getClass().isPrimitive()
159+
&& !value.getClass().getName().startsWith("java.")) {
160+
processModelNode(value, fullPath, field.getName(), visited);
161+
}
162+
} catch (Exception e) {
163+
// Skip inaccessible or problematic fields
164+
}
165+
}
166+
clazz = clazz.getSuperclass();
167+
}
168+
}
169+
170+
private String getSingular(String plural) {
171+
if (plural.endsWith("ies")) {
172+
return plural.substring(0, plural.length() - 3) + "y";
173+
}
174+
if (plural.endsWith("s")) {
175+
return plural.substring(0, plural.length() - 1);
176+
}
177+
return plural;
178+
}
179+
180+
private void recordString(String path, String value) {
181+
pathStats
182+
.computeIfAbsent(path, k -> new HashMap<>())
183+
.computeIfAbsent(value, k -> new StringStats())
184+
.recordOccurrence(value);
185+
}
186+
187+
public List<PathAnalysis> getPathAnalysisSorted() {
188+
List<PathAnalysis> analysis = new ArrayList<>();
189+
190+
for (Map.Entry<String, Map<String, StringStats>> entry : pathStats.entrySet()) {
191+
String path = entry.getKey();
192+
Map<String, StringStats> stats = entry.getValue();
193+
194+
long uniqueStrings = stats.size();
195+
long totalOccurrences = stats.values().stream()
196+
.mapToLong(StringStats::getOccurrences)
197+
.sum();
198+
long totalMemory = stats.entrySet().stream()
199+
.mapToLong(e -> e.getKey().length() * e.getValue().getOccurrences() * 2L)
200+
.sum();
201+
long potentialSavings = stats.entrySet().stream()
202+
.mapToLong(e -> e.getKey().length() * 2L * (e.getValue().getOccurrences() - 1))
203+
.sum();
204+
205+
analysis.add(new PathAnalysis(
206+
path,
207+
uniqueStrings,
208+
totalOccurrences,
209+
totalMemory,
210+
potentialSavings,
211+
(double) totalOccurrences / uniqueStrings,
212+
getMostFrequentValues(stats, 5)));
213+
}
214+
215+
analysis.sort((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings));
216+
return analysis;
217+
}
218+
219+
private List<ValueFrequency> getMostFrequentValues(Map<String, StringStats> stats, int limit) {
220+
return stats.entrySet().stream()
221+
.map(e -> new ValueFrequency(e.getKey(), e.getValue().getOccurrences()))
222+
.sorted((a, b) -> Long.compare(b.frequency, a.frequency))
223+
.limit(limit)
224+
.collect(Collectors.toList());
225+
}
226+
227+
public void printAnalysis() {
228+
System.out.printf("Analyzed %d POMs%n%n", totalPoms);
229+
230+
// First, get all paths
231+
List<PathAnalysis> allPaths = getPathAnalysisSorted();
232+
233+
// Create groups based on the final path component
234+
Map<String, List<PathAnalysis>> groupedPaths = new HashMap<>();
235+
Map<String, Map<String, Long>> groupValueFrequencies = new HashMap<>();
236+
237+
for (PathAnalysis path : allPaths) {
238+
String finalComponent = path.path.substring(path.path.lastIndexOf('/') + 1);
239+
240+
// Add path to its group
241+
groupedPaths.computeIfAbsent(finalComponent, k -> new ArrayList<>()).add(path);
242+
243+
// Aggregate value frequencies for the group
244+
Map<String, Long> groupFreqs = groupValueFrequencies.computeIfAbsent(finalComponent, k -> new HashMap<>());
245+
for (ValueFrequency vf : path.mostFrequentValues) {
246+
groupFreqs.merge(vf.value, vf.frequency, Long::sum);
247+
}
248+
}
249+
250+
// Create final group analyses and sort them by total savings
251+
List<GroupAnalysis> sortedGroups = groupedPaths.entrySet().stream()
252+
.map(entry -> {
253+
String groupName = entry.getKey();
254+
List<PathAnalysis> paths = entry.getValue();
255+
Map<String, Long> valueFreqs = groupValueFrequencies.get(groupName);
256+
257+
long totalSavings =
258+
paths.stream().mapToLong(p -> p.potentialSavings).sum();
259+
long totalMemory =
260+
paths.stream().mapToLong(p -> p.totalMemory).sum();
261+
long totalUnique = valueFreqs.size();
262+
long totalOccurrences =
263+
valueFreqs.values().stream().mapToLong(l -> l).sum();
264+
265+
List<ValueFrequency> topValues = valueFreqs.entrySet().stream()
266+
.map(e -> new ValueFrequency(e.getKey(), e.getValue()))
267+
.sorted((a, b) -> Long.compare(b.frequency, a.frequency))
268+
.limit(5)
269+
.collect(Collectors.toList());
270+
271+
return new GroupAnalysis(
272+
groupName, paths, totalUnique, totalOccurrences, totalMemory, totalSavings, topValues);
273+
})
274+
.sorted((a, b) -> Long.compare(b.totalSavings, a.totalSavings))
275+
.collect(Collectors.toList());
276+
277+
// Print each group
278+
for (GroupAnalysis group : sortedGroups) {
279+
System.out.printf("%nPaths ending with '%s':%n", group.name);
280+
System.out.printf("Total potential savings: %dKB%n", group.totalSavings / 1024);
281+
System.out.printf("Total memory: %dKB%n", group.totalMemory / 1024);
282+
System.out.printf("Total unique values: %d%n", group.totalUnique);
283+
System.out.printf("Total occurrences: %d%n", group.totalOccurrences);
284+
System.out.printf("Duplication ratio: %.2f%n", (double) group.totalOccurrences / group.totalUnique);
285+
286+
System.out.println("\nMost frequent values across all paths:");
287+
for (ValueFrequency v : group.mostFrequentValues) {
288+
System.out.printf(" %-70s %d times%n", v.value, v.frequency);
289+
}
290+
291+
System.out.println("\nIndividual paths:");
292+
System.out.println("----------------------------------------");
293+
for (PathAnalysis path : group.paths.stream()
294+
.sorted((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings))
295+
.collect(Collectors.toList())) {
296+
System.out.printf(
297+
"%-90s %6dKB %6dKB%n", path.path, path.totalMemory / 1024, path.potentialSavings / 1024);
298+
}
299+
System.out.println();
300+
}
301+
}
302+
303+
private static class GroupAnalysis {
304+
final String name;
305+
final List<PathAnalysis> paths;
306+
final long totalUnique;
307+
final long totalOccurrences;
308+
final long totalMemory;
309+
final long totalSavings;
310+
final List<ValueFrequency> mostFrequentValues;
311+
312+
GroupAnalysis(
313+
String name,
314+
List<PathAnalysis> paths,
315+
long totalUnique,
316+
long totalOccurrences,
317+
long totalMemory,
318+
long totalSavings,
319+
List<ValueFrequency> mostFrequentValues) {
320+
this.name = name;
321+
this.paths = paths;
322+
this.totalUnique = totalUnique;
323+
this.totalOccurrences = totalOccurrences;
324+
this.totalMemory = totalMemory;
325+
this.totalSavings = totalSavings;
326+
this.mostFrequentValues = mostFrequentValues;
327+
}
328+
}
329+
330+
private static class StringStats {
331+
private long occurrences = 0;
332+
333+
public void recordOccurrence(String value) {
334+
occurrences++;
335+
}
336+
337+
public long getOccurrences() {
338+
return occurrences;
339+
}
340+
}
341+
342+
public static class PathAnalysis {
343+
public final String path;
344+
public final long uniqueStrings;
345+
public final long totalOccurrences;
346+
public final long totalMemory;
347+
public final long potentialSavings;
348+
public final double duplicationRatio;
349+
public final List<ValueFrequency> mostFrequentValues;
350+
351+
public PathAnalysis(
352+
String path,
353+
long uniqueStrings,
354+
long totalOccurrences,
355+
long totalMemory,
356+
long potentialSavings,
357+
double duplicationRatio,
358+
List<ValueFrequency> mostFrequentValues) {
359+
this.path = path;
360+
this.uniqueStrings = uniqueStrings;
361+
this.totalOccurrences = totalOccurrences;
362+
this.totalMemory = totalMemory;
363+
this.potentialSavings = potentialSavings;
364+
this.duplicationRatio = duplicationRatio;
365+
this.mostFrequentValues = mostFrequentValues;
366+
}
367+
}
368+
369+
public static class ValueFrequency {
370+
public final String value;
371+
public final long frequency;
372+
373+
public ValueFrequency(String value, long frequency) {
374+
this.value = value;
375+
this.frequency = frequency;
376+
}
377+
}
378+
379+
}
380+

0 commit comments

Comments
 (0)