Skip to content

Commit ae56090

Browse files
committed
Overhaul the modification matching for the peptide_list.txt
1 parent 56ca8c3 commit ae56090

File tree

4 files changed

+61
-54
lines changed

4 files changed

+61
-54
lines changed

MSFragger-GUI/src/com/dmtavt/fragpipe/tools/skyline/Skyline.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ private static void runSkyline(String skylinePath, Path wd, String skylineVersio
156156

157157
Path peptideListPath = skylineOutputDir.resolve("peptide_list.txt").toAbsolutePath();
158158
WritePeptideList pepWriter = new WritePeptideList();
159-
Map<String, Set<String>> addedMods = pepWriter.writePeptideList(psmTsvFiles, peptideListPath);
159+
Map<Float, Set<String>> addedMods = pepWriter.writePeptideList(psmTsvFiles, peptideListPath);
160160

161161
Path modXmlPath = wd.resolve("mod.xml");
162162
WriteSkyMods writeSkyMods = new WriteSkyMods(modXmlPath, pf, modsMode, matchUnimod, !useSpeclib, addedMods);
@@ -256,6 +256,8 @@ private static void runSkyline(String skylinePath, Path wd, String skylineVersio
256256
}
257257

258258
for (String s : lcmsFiles) {
259+
// todo: Skyline automatically looking for the LC-MS files in specific directories and import them even though they should noe be imported for quant because they are just for library building
260+
// todo: if there are LC-MS files in the directories where Skyline was looking for the files, the same files will be imported twice
259261
writer.write("--import-file=" + s + " ");
260262
}
261263

MSFragger-GUI/src/com/dmtavt/fragpipe/tools/skyline/WritePeptideList.java

Lines changed: 54 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,21 @@
77
import java.util.regex.Pattern;
88

99
public class WritePeptideList {
10+
1011
private static Map<String, Integer> columns;
11-
private static final Pattern sitePattern = Pattern.compile("(\\d+)\\w\\(");
12-
private static final Pattern massPattern = Pattern.compile("(\\([\\d.]+\\))");
13-
private static final Pattern AApattern = Pattern.compile("\\d?([\\w-]+)\\(");
12+
private static final Pattern varModPattern = Pattern.compile("([0-9]+)([A-Z])\\(([\\d.-]+)\\)");
13+
private static final Pattern nTermModPattern = Pattern.compile("N-term\\(([\\d.-]+)\\)");
14+
private static final Pattern cTermModPattern = Pattern.compile("C-term\\(([\\d.-]+)\\)");
1415

1516
public static final String COL_ASSIGNED_MODS = "Assigned Modifications";
1617
public static final String COL_PEPTIDE = "Peptide";
1718
public static final String COL_CHARGE = "Charge";
1819
public static final String COL_PROTEIN = "Protein";
1920

2021

21-
public Map<String, Set<String>> writePeptideList(Set<Path> psmtsvFiles, Path outputPath) throws IOException {
22+
public Map<Float, Set<String>> writePeptideList(Set<Path> psmtsvFiles, Path outputPath) throws IOException {
2223
Map<String, Set<String>> proteinMap = new HashMap<>();
23-
Map<String, Set<String>> additiveMods = new HashMap<>();
24+
Map<Float, Set<String>> additiveMods = new HashMap<>();
2425

2526
for (Path psmtsv: psmtsvFiles) {
2627
BufferedReader reader = new BufferedReader(new FileReader(psmtsv.toFile()));
@@ -62,31 +63,48 @@ public Map<String, Set<String>> writePeptideList(Set<Path> psmtsvFiles, Path out
6263
* of "+" characters.
6364
* @return
6465
*/
65-
public static String generateModifiedPeptide(String[] psmSplits, Map<String, Integer> columns, boolean addCharge, Map<String, Set<String>> additiveMods) {
66+
public static String generateModifiedPeptide(String[] psmSplits, Map<String, Integer> columns, boolean addCharge, Map<Float, Set<String>> additiveMods) {
6667
String peptide = psmSplits[columns.get(COL_PEPTIDE)];
68+
String mods = psmSplits[columns.get(COL_ASSIGNED_MODS)].trim();
69+
Map<Integer, Float> modMap = new HashMap<>();
70+
71+
Matcher m = nTermModPattern.matcher(mods);
72+
while (m.find()) {
73+
Float f = modMap.get(1);
74+
if (f != null) {
75+
f += Float.parseFloat(m.group(1));
76+
modMap.put(1, f);
77+
additiveMods.computeIfAbsent(f, k -> new HashSet<>()).add("n^");
78+
} else {
79+
modMap.put(1, Float.parseFloat(m.group(1)));
80+
}
81+
}
6782

68-
String[] mods = psmSplits[columns.get(COL_ASSIGNED_MODS)].split(",");
69-
Map<Integer, String> modMap = new TreeMap<>();
70-
for (String mod : mods) {
71-
Matcher siteMatch = sitePattern.matcher(mod);
72-
int site;
73-
if (siteMatch.find()) {
74-
site = Integer.parseInt(siteMatch.group(1));
75-
Matcher massMatch = massPattern.matcher(mod);
76-
if (massMatch.find()) {
77-
if (modMap.containsKey(site)) {
78-
// handle multiple mods (e.g., 5C(57.0215),5C(100.00)) by adding masses together into a single mod
79-
double mass = Double.parseDouble(massMatch.group(1).replace("(", "").replace(")", ""));
80-
mass += Double.parseDouble(modMap.get(site).replace("[", "").replace("]", ""));
81-
modMap.put(site, String.format("[%.5f]", mass));
82-
// add mod to list for appending to mod.xml
83-
additiveMods.computeIfAbsent(String.format("%.4f", mass), k -> new HashSet<>()).add(getSite(mod));
84-
} else {
85-
modMap.put(site, massMatch.group(1).replace("(", "[").replace(")", "]"));
86-
}
87-
}
83+
m = cTermModPattern.matcher(mods);
84+
while (m.find()) {
85+
Float f = modMap.get(peptide.length());
86+
if (f != null) {
87+
f += Float.parseFloat(m.group(1));
88+
modMap.put(peptide.length(), f);
89+
additiveMods.computeIfAbsent(f, k -> new HashSet<>()).add("c^");
90+
} else {
91+
modMap.put(peptide.length(), Float.parseFloat(m.group(1)));
92+
}
93+
}
94+
95+
m = varModPattern.matcher(mods);
96+
while (m.find()) {
97+
int site = Integer.parseInt(m.group(1));
98+
Float f = modMap.get(site);
99+
if (f != null) {
100+
f += Float.parseFloat(m.group(3));
101+
modMap.put(site, f);
102+
additiveMods.computeIfAbsent(f, k -> new HashSet<>()).add(m.group(2));
103+
} else {
104+
modMap.put(site, Float.parseFloat(m.group(3)));
88105
}
89106
}
107+
90108
String modPep = insertMods(peptide, modMap);
91109
int charge = Integer.parseInt(psmSplits[columns.get(COL_CHARGE)]);
92110
String chargeStr = addCharge ? "+".repeat(charge) : "";
@@ -96,34 +114,21 @@ public static String generateModifiedPeptide(String[] psmSplits, Map<String, Int
96114
/**
97115
* Generate a modified peptide String with all Assigned modifications placed within it
98116
*/
99-
private static String insertMods(String peptide, Map<Integer, String> modMap) {
100-
StringBuilder modifiedPeptide = new StringBuilder(peptide);
101-
102-
// Offset to account for insertions
103-
int offset = 0;
104-
105-
// Iterate through the sorted entries and insert the mods
106-
for (Map.Entry<Integer, String> entry : modMap.entrySet()) {
107-
int position = entry.getKey() + offset;
108-
String mod = entry.getValue();
109-
110-
if (position >= 0 && position <= modifiedPeptide.length()) {
111-
modifiedPeptide.insert(position, mod);
112-
offset += mod.length();
117+
private static String insertMods(String peptide, Map<Integer, Float> modMap) {
118+
StringBuilder modifiedPeptide = new StringBuilder(peptide.length());
119+
char[] aas = peptide.toCharArray();
120+
for (int i = 0; i < aas.length; ++i) {
121+
Float f = modMap.get(i + 1);
122+
if (f != null) {
123+
// With more decimal digits, there will be mismatches between the floating point values from FragPipe and Skyline.
124+
modifiedPeptide.append(String.format("%c[%.1f]", aas[i], f));
125+
} else {
126+
modifiedPeptide.append(aas[i]);
113127
}
114128
}
115129
return modifiedPeptide.toString();
116130
}
117131

118-
// Return the AA (or terminus) of a given Assigned Mod
119-
private static String getSite(String mod) {
120-
Matcher m = AApattern.matcher(mod);
121-
if (m.find()) {
122-
return m.group(1);
123-
}
124-
return "";
125-
}
126-
127132
private String initHeader(String header) {
128133
columns = new HashMap<>();
129134
String[] splits = header.split("\t");

MSFragger-GUI/src/com/dmtavt/fragpipe/tools/skyline/WriteSSL.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public void writeSSL(Set<Path> psmtsvFiles, Path outputPath, boolean isPercolato
6161
sslLine.append(scoreType).append("\t");
6262
sslLine.append(splits[columns.get(COL_SCORE)]).append("\t");
6363
double rt = Double.parseDouble(splits[columns.get(COL_RT)]);
64-
sslLine.append(String.format("%.4f", rt / 60.0)).append("\t"); // RT in minutes
64+
sslLine.append(rt / 60f).append("\t"); // RT in minutes
6565

6666
// add IM if present
6767
if (checkIM) {

MSFragger-GUI/src/com/dmtavt/fragpipe/tools/skyline/WriteSkyMods.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public class WriteSkyMods {
7272
}
7373
}
7474

75-
public WriteSkyMods(Path path, PropsFile pf, int modsMode, boolean matchUnimod, boolean isSSL, Map<String, Set<String>> addedMods) throws Exception {
75+
public WriteSkyMods(Path path, PropsFile pf, int modsMode, boolean matchUnimod, boolean isSSL, Map<Float, Set<String>> addedMods) throws Exception {
7676
List<Mod> mods = new ArrayList<>(4);
7777

7878
String fixModStr = pf.getProperty("msfragger.table.fix-mods");
@@ -113,8 +113,8 @@ public WriteSkyMods(Path path, PropsFile pf, int modsMode, boolean matchUnimod,
113113
}
114114

115115
// add any combined mods (multiple at one site) found during peptide list generation
116-
for (Map.Entry<String, Set<String>> entry : addedMods.entrySet()) {
117-
mass = Float.parseFloat(entry.getKey());
116+
for (Map.Entry<Float, Set<String>> entry : addedMods.entrySet()) {
117+
mass = entry.getKey();
118118
mods.addAll(convertMods(String.join("", entry.getValue()), true, mass, mass, new ArrayList<>(), new ArrayList<>(), false));
119119
}
120120

0 commit comments

Comments
 (0)