Skip to content

Commit 28d88ef

Browse files
committed
SID to CID rewrite function included.
1 parent 0639986 commit 28d88ef

File tree

1 file changed

+205
-20
lines changed

1 file changed

+205
-20
lines changed

MassBank-Project/MassBank-lib/src/main/java/massbank/cli/AddMetaData.java

Lines changed: 205 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -475,11 +475,17 @@ else if (ret == InchiStatus.ERROR) {
475475
}
476476

477477
public static String fetchCIDFromSID(String sid) {
478-
String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/" + sid + "/cids/JSON";
478+
String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/" + sid + "/JSON";
479479
try {
480480
HttpURLConnection connection = (HttpURLConnection) new URL(apiUrl).openConnection();
481481
connection.setRequestMethod("GET");
482482

483+
int responseCode = connection.getResponseCode();
484+
if (responseCode != 200) {
485+
System.out.println("Error fetching CID: Server returned HTTP response code " + responseCode);
486+
return null;
487+
}
488+
483489
Scanner scanner = new Scanner(connection.getInputStream());
484490
StringBuilder response = new StringBuilder();
485491
while (scanner.hasNext()) {
@@ -488,38 +494,210 @@ public static String fetchCIDFromSID(String sid) {
488494
scanner.close();
489495

490496
JsonObject jsonResponse = JsonParser.parseString(response.toString()).getAsJsonObject();
491-
JsonArray cid = jsonResponse.getAsJsonObject("InformationList")
492-
.getAsJsonArray("Information")
493-
.get(0).getAsJsonObject()
494-
.getAsJsonArray("CID");
495-
496-
if (cid.size() != 1) {
497-
System.out.println("Error: More than one CID found for SID " + sid);
498-
return null;
499-
}
500-
501-
return cid.get(0).getAsString();
497+
JsonObject information = jsonResponse.getAsJsonArray("PC_Substances")
498+
.get(0).getAsJsonObject();
499+
500+
JsonArray compoundArray = information.getAsJsonArray("compound");
501+
if (compoundArray.size() > 1) {
502+
String cid = information.getAsJsonArray("compound")
503+
.get(1).getAsJsonObject()
504+
.getAsJsonObject("id")
505+
.getAsJsonObject("id")
506+
.getAsJsonPrimitive("cid").getAsString();
507+
508+
String sourceName = information.getAsJsonObject("source")
509+
.getAsJsonObject("db")
510+
.getAsJsonPrimitive("name").getAsString();
511+
512+
String sourceID = information.getAsJsonObject("source")
513+
.getAsJsonObject("db")
514+
.getAsJsonObject("source_id")
515+
.getAsJsonPrimitive("str").getAsString();
516+
return cid;
517+
} else {
518+
System.out.println("No valid CID found in the response.");
519+
return null;
520+
}
502521
} catch (IOException e) {
503522
e.printStackTrace();
504523
System.out.println("Error fetching CID");
505524
return null;
506525
}
507526
}
508527

528+
public static Map<String, String> fetchExternalDBIdFromPubchemCID(String cid) {
529+
String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/" + cid + "/JSON";
530+
Map<String, String> externalDatabaseInfo = new HashMap<>();
531+
try {
532+
HttpURLConnection connection = (HttpURLConnection) new URL(apiUrl).openConnection();
533+
connection.setRequestMethod("GET");
534+
535+
Scanner scanner = new Scanner(connection.getInputStream());
536+
StringBuilder response = new StringBuilder();
537+
while (scanner.hasNext()) {
538+
response.append(scanner.nextLine());
539+
}
540+
scanner.close();
541+
542+
JsonObject jsonResponse = JsonParser.parseString(response.toString()).getAsJsonObject();
543+
JsonArray sections = jsonResponse.getAsJsonObject("Record").getAsJsonArray("Section");
544+
545+
for (int i = 0; i < sections.size(); i++) {
546+
JsonObject section = sections.get(i).getAsJsonObject();
547+
if (section.get("TOCHeading").getAsString().equals("Names and Identifiers")) {
548+
JsonArray subSections = section.getAsJsonArray("Section");
549+
for (int j = 0; j < subSections.size(); j++) {
550+
JsonObject subSection = subSections.get(j).getAsJsonObject();
551+
if (subSection.get("TOCHeading").getAsString().equals("Other Identifiers")) {
552+
JsonArray identifiers = subSection.getAsJsonArray("Section");
553+
for (int k = 0; k < identifiers.size(); k++) {
554+
JsonObject identifier = identifiers.get(k).getAsJsonObject();
555+
String dbName = identifier.get("TOCHeading").getAsString();
556+
JsonArray information = identifier.getAsJsonArray("Information");
557+
for (int l = 0; l < information.size(); l++) {
558+
JsonObject info = information.get(l).getAsJsonObject();
559+
if (info.has("Value")) {
560+
JsonArray value = info.getAsJsonObject("Value").getAsJsonArray("StringWithMarkup");
561+
for (int m = 0; m < value.size(); m++) {
562+
JsonObject dbInfo = value.get(m).getAsJsonObject();
563+
String dbValue = dbInfo.get("String").getAsString();
564+
externalDatabaseInfo.put(dbName, dbValue);
565+
}
566+
}
567+
}
568+
}
569+
} else if (subSection.get("TOCHeading").getAsString().equals("Computed Descriptors")) {
570+
JsonArray identifiers = subSection.getAsJsonArray("Section");
571+
for (int k = 0; k < identifiers.size(); k++) {
572+
JsonObject identifier = identifiers.get(k).getAsJsonObject();
573+
String dbName = identifier.get("TOCHeading").getAsString();
574+
if (dbName.equals("InChI")) {
575+
JsonArray information = identifier.getAsJsonArray("Information");
576+
for (int l = 0; l < information.size(); l++) {
577+
JsonObject info = information.get(l).getAsJsonObject();
578+
if (info.has("Value")) {
579+
String inchi = info.getAsJsonObject("Value").getAsJsonArray("StringWithMarkup")
580+
.get(0).getAsJsonObject().get("String").getAsString();
581+
externalDatabaseInfo.put("InChI", inchi);
582+
}
583+
}
584+
}
585+
if (dbName.equals("InChIKey")) {
586+
JsonArray information = identifier.getAsJsonArray("Information");
587+
for (int l = 0; l < information.size(); l++) {
588+
JsonObject info = information.get(l).getAsJsonObject();
589+
if (info.has("Value")) {
590+
String inchi = info.getAsJsonObject("Value").getAsJsonArray("StringWithMarkup")
591+
.get(0).getAsJsonObject().get("String").getAsString();
592+
externalDatabaseInfo.put("INCHIKEY", inchi);
593+
}
594+
}
595+
}
596+
}
597+
}
598+
}
599+
}
600+
}
601+
} catch (IOException e) {
602+
e.printStackTrace();
603+
System.out.println("Error fetching info for CID");
604+
return new HashMap<>();
605+
}
606+
return externalDatabaseInfo;
607+
}
608+
609+
509610
public static String doNormalizeCompoundIdentifier(Record record) {
510611
Map<String, String> inRecord = new HashMap<>();
511-
inRecord.put("Inchi", record.CH_IUPAC());
612+
inRecord.put("InChI", record.CH_IUPAC());
512613
inRecord.put("SMILES", record.CH_SMILES());
513-
if (record.CH_LINK().get("PUBCHEM") != null) {
514-
inRecord.put("PUBCHEM", record.CH_LINK().get("PUBCHEM"));
614+
if (record.CH_LINK() != null) {
615+
inRecord.putAll(record.CH_LINK());
616+
}
617+
618+
Map<String, String> externalDBInfo = new HashMap<>();
619+
String cid = "";
620+
if (inRecord.containsKey("PUBCHEM")) {
621+
String pubchem = inRecord.get("PUBCHEM");
622+
if (pubchem.startsWith("SID:")) {
623+
String sid = pubchem.substring(4); // Extract the identifier after "SID:"
624+
cid = fetchCIDFromSID(sid);
625+
if (cid != null) {
626+
externalDBInfo = fetchExternalDBIdFromPubchemCID(cid);
627+
} else {
628+
return record.toString();
629+
}
630+
}
631+
}
632+
633+
// Rename keys
634+
Map<String, String> renamedExternalDBInfo = new HashMap<>();
635+
for (Entry<String, String> entry : externalDBInfo.entrySet()) {
636+
String key = entry.getKey();
637+
if (key.equals("ChEBI ID")) {
638+
key = "CHEBI";
639+
}
640+
if (key.equals("HMDB ID")) {
641+
key = "HMDB";
642+
}
643+
if (key.equals("KEGG ID")) {
644+
key = "KEGG";
645+
}
646+
if (key.equals("Lipid Maps ID (LM_ID)")) {
647+
key = "LIPIDMAPS";
648+
}
649+
if (key.equals("ChEMBL ID")) {
650+
key = "CHEMBL";
651+
}
652+
renamedExternalDBInfo.put(key, entry.getValue());
515653
}
516-
if (record.CH_LINK().get("CAS") != null) {
517-
inRecord.put("CAS", record.CH_LINK().get("CAS"));
654+
externalDBInfo = renamedExternalDBInfo;
655+
656+
if (inRecord.get("InChI") != null) {
657+
for (Entry<String, String> entry : inRecord.entrySet()) {
658+
String key = entry.getKey();
659+
if (externalDBInfo.containsKey(key)) {
660+
String inRecordValue = entry.getValue();
661+
String externalDBInfoValue = externalDBInfo.get(key);
662+
if (!inRecordValue.equals(externalDBInfoValue)) {
663+
System.out.println("Mismatch found for key: " + key);
664+
System.out.println("inRecord value: " + inRecordValue);
665+
System.out.println("externalDBInfo value: " + externalDBInfoValue);
666+
}
667+
}
668+
}
669+
}
670+
if (inRecord.get("InChI") != null && externalDBInfo.get("InChI") != null &&
671+
inRecord.get("InChI").substring(0, 14).equals(externalDBInfo.get("InChI").substring(0, 14))) {
672+
Map<String, String> chlink = record.CH_LINK();
673+
chlink.put("PUBCHEM", "CID:" + cid);
674+
} else {
675+
System.out.println("InChI does not match.");
676+
System.out.println("InChIKey in Record: " + inRecord.get("INCHIKEY"));
677+
System.out.println("InChIKey for compound in CID: " + externalDBInfo.get("INCHIKEY"));
678+
System.out.println("Rewrite SID to CID? (y/n/s): ");
679+
Scanner scanner = new Scanner(System.in);
680+
String response = scanner.nextLine();
681+
Map<String, String> chlink = record.CH_LINK();
682+
if (response.equalsIgnoreCase("y")) {
683+
chlink.put("PUBCHEM", "CID:" + cid);
684+
}
685+
// for (Entry<String, String> entry : chlink.entrySet()) {
686+
// String key = entry.getKey();
687+
// if (externalDBInfo.containsKey(key)) {
688+
// chlink.put(key, externalDBInfo.get(key));
689+
// }
690+
// }
691+
// record.CH_LINK(chlink);
692+
// } else if (response.equalsIgnoreCase("n")) {
693+
// chlink.put("PUBCHEM", "CID:" + cid);
694+
// } else {
695+
// return record.toString();
696+
// }
697+
// }
518698
}
519699

520-
System.out.println(inRecord);
521700

522-
System.out.println(fetchCIDFromSID("5689"));
523701

524702
return record.toString();
525703
}
@@ -542,11 +720,12 @@ public static void main(String[] arguments) throws Exception {
542720
}
543721

544722
RecordParser recordparser = new RecordParser(new HashSet<>());
545-
recordFiles.parallelStream()
723+
recordFiles.stream()
546724
.map(Validator::readFile)
547725
.filter(Objects::nonNull)
548726
.forEach(recordString -> {
549727
logger.info("Working on {}.", recordString.getKey());
728+
System.out.println("Working on " + recordString.getKey());
550729
Record record = parseRecord(recordString, recordparser);
551730
if (record == null || record.DEPRECATED()) return;
552731

@@ -561,6 +740,11 @@ public static void main(String[] arguments) throws Exception {
561740
//}
562741

563742
recordStringAfterMod = doNormalizeCompoundIdentifier(record);
743+
try {
744+
Thread.sleep(100);
745+
} catch (InterruptedException e) {
746+
throw new RuntimeException(e);
747+
}
564748

565749

566750
// if (doAddPubchemCid.get()) {
@@ -595,6 +779,7 @@ public static void main(String[] arguments) throws Exception {
595779
}
596780
}
597781
});
782+
System.out.println();
598783
}
599784

600785
private static CommandLine parseCommandLine(String[] arguments) {

0 commit comments

Comments
 (0)