@@ -475,11 +475,17 @@ else if (ret == InchiStatus.ERROR) {
475
475
}
476
476
477
477
public static String fetchCIDFromSID (String sid ) {
478
- String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/" + sid + "/cids/ JSON" ;
478
+ String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/" + sid + "/JSON" ;
479
479
try {
480
480
HttpURLConnection connection = (HttpURLConnection ) new URL (apiUrl ).openConnection ();
481
481
connection .setRequestMethod ("GET" );
482
482
483
+ int responseCode = connection .getResponseCode ();
484
+ if (responseCode != 200 ) {
485
+ System .out .println ("Error fetching CID: Server returned HTTP response code " + responseCode );
486
+ return null ;
487
+ }
488
+
483
489
Scanner scanner = new Scanner (connection .getInputStream ());
484
490
StringBuilder response = new StringBuilder ();
485
491
while (scanner .hasNext ()) {
@@ -488,38 +494,210 @@ public static String fetchCIDFromSID(String sid) {
488
494
scanner .close ();
489
495
490
496
JsonObject jsonResponse = JsonParser .parseString (response .toString ()).getAsJsonObject ();
491
- JsonArray cid = jsonResponse .getAsJsonObject ("InformationList" )
492
- .getAsJsonArray ("Information" )
493
- .get (0 ).getAsJsonObject ()
494
- .getAsJsonArray ("CID" );
495
-
496
- if (cid .size () != 1 ) {
497
- System .out .println ("Error: More than one CID found for SID " + sid );
498
- return null ;
499
- }
500
-
501
- return cid .get (0 ).getAsString ();
497
+ JsonObject information = jsonResponse .getAsJsonArray ("PC_Substances" )
498
+ .get (0 ).getAsJsonObject ();
499
+
500
+ JsonArray compoundArray = information .getAsJsonArray ("compound" );
501
+ if (compoundArray .size () > 1 ) {
502
+ String cid = information .getAsJsonArray ("compound" )
503
+ .get (1 ).getAsJsonObject ()
504
+ .getAsJsonObject ("id" )
505
+ .getAsJsonObject ("id" )
506
+ .getAsJsonPrimitive ("cid" ).getAsString ();
507
+
508
+ String sourceName = information .getAsJsonObject ("source" )
509
+ .getAsJsonObject ("db" )
510
+ .getAsJsonPrimitive ("name" ).getAsString ();
511
+
512
+ String sourceID = information .getAsJsonObject ("source" )
513
+ .getAsJsonObject ("db" )
514
+ .getAsJsonObject ("source_id" )
515
+ .getAsJsonPrimitive ("str" ).getAsString ();
516
+ return cid ;
517
+ } else {
518
+ System .out .println ("No valid CID found in the response." );
519
+ return null ;
520
+ }
502
521
} catch (IOException e ) {
503
522
e .printStackTrace ();
504
523
System .out .println ("Error fetching CID" );
505
524
return null ;
506
525
}
507
526
}
508
527
528
+ public static Map <String , String > fetchExternalDBIdFromPubchemCID (String cid ) {
529
+ String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/" + cid + "/JSON" ;
530
+ Map <String , String > externalDatabaseInfo = new HashMap <>();
531
+ try {
532
+ HttpURLConnection connection = (HttpURLConnection ) new URL (apiUrl ).openConnection ();
533
+ connection .setRequestMethod ("GET" );
534
+
535
+ Scanner scanner = new Scanner (connection .getInputStream ());
536
+ StringBuilder response = new StringBuilder ();
537
+ while (scanner .hasNext ()) {
538
+ response .append (scanner .nextLine ());
539
+ }
540
+ scanner .close ();
541
+
542
+ JsonObject jsonResponse = JsonParser .parseString (response .toString ()).getAsJsonObject ();
543
+ JsonArray sections = jsonResponse .getAsJsonObject ("Record" ).getAsJsonArray ("Section" );
544
+
545
+ for (int i = 0 ; i < sections .size (); i ++) {
546
+ JsonObject section = sections .get (i ).getAsJsonObject ();
547
+ if (section .get ("TOCHeading" ).getAsString ().equals ("Names and Identifiers" )) {
548
+ JsonArray subSections = section .getAsJsonArray ("Section" );
549
+ for (int j = 0 ; j < subSections .size (); j ++) {
550
+ JsonObject subSection = subSections .get (j ).getAsJsonObject ();
551
+ if (subSection .get ("TOCHeading" ).getAsString ().equals ("Other Identifiers" )) {
552
+ JsonArray identifiers = subSection .getAsJsonArray ("Section" );
553
+ for (int k = 0 ; k < identifiers .size (); k ++) {
554
+ JsonObject identifier = identifiers .get (k ).getAsJsonObject ();
555
+ String dbName = identifier .get ("TOCHeading" ).getAsString ();
556
+ JsonArray information = identifier .getAsJsonArray ("Information" );
557
+ for (int l = 0 ; l < information .size (); l ++) {
558
+ JsonObject info = information .get (l ).getAsJsonObject ();
559
+ if (info .has ("Value" )) {
560
+ JsonArray value = info .getAsJsonObject ("Value" ).getAsJsonArray ("StringWithMarkup" );
561
+ for (int m = 0 ; m < value .size (); m ++) {
562
+ JsonObject dbInfo = value .get (m ).getAsJsonObject ();
563
+ String dbValue = dbInfo .get ("String" ).getAsString ();
564
+ externalDatabaseInfo .put (dbName , dbValue );
565
+ }
566
+ }
567
+ }
568
+ }
569
+ } else if (subSection .get ("TOCHeading" ).getAsString ().equals ("Computed Descriptors" )) {
570
+ JsonArray identifiers = subSection .getAsJsonArray ("Section" );
571
+ for (int k = 0 ; k < identifiers .size (); k ++) {
572
+ JsonObject identifier = identifiers .get (k ).getAsJsonObject ();
573
+ String dbName = identifier .get ("TOCHeading" ).getAsString ();
574
+ if (dbName .equals ("InChI" )) {
575
+ JsonArray information = identifier .getAsJsonArray ("Information" );
576
+ for (int l = 0 ; l < information .size (); l ++) {
577
+ JsonObject info = information .get (l ).getAsJsonObject ();
578
+ if (info .has ("Value" )) {
579
+ String inchi = info .getAsJsonObject ("Value" ).getAsJsonArray ("StringWithMarkup" )
580
+ .get (0 ).getAsJsonObject ().get ("String" ).getAsString ();
581
+ externalDatabaseInfo .put ("InChI" , inchi );
582
+ }
583
+ }
584
+ }
585
+ if (dbName .equals ("InChIKey" )) {
586
+ JsonArray information = identifier .getAsJsonArray ("Information" );
587
+ for (int l = 0 ; l < information .size (); l ++) {
588
+ JsonObject info = information .get (l ).getAsJsonObject ();
589
+ if (info .has ("Value" )) {
590
+ String inchi = info .getAsJsonObject ("Value" ).getAsJsonArray ("StringWithMarkup" )
591
+ .get (0 ).getAsJsonObject ().get ("String" ).getAsString ();
592
+ externalDatabaseInfo .put ("INCHIKEY" , inchi );
593
+ }
594
+ }
595
+ }
596
+ }
597
+ }
598
+ }
599
+ }
600
+ }
601
+ } catch (IOException e ) {
602
+ e .printStackTrace ();
603
+ System .out .println ("Error fetching info for CID" );
604
+ return new HashMap <>();
605
+ }
606
+ return externalDatabaseInfo ;
607
+ }
608
+
609
+
509
610
public static String doNormalizeCompoundIdentifier (Record record ) {
510
611
Map <String , String > inRecord = new HashMap <>();
511
- inRecord .put ("Inchi " , record .CH_IUPAC ());
612
+ inRecord .put ("InChI " , record .CH_IUPAC ());
512
613
inRecord .put ("SMILES" , record .CH_SMILES ());
513
- if (record .CH_LINK ().get ("PUBCHEM" ) != null ) {
514
- inRecord .put ("PUBCHEM" , record .CH_LINK ().get ("PUBCHEM" ));
614
+ if (record .CH_LINK () != null ) {
615
+ inRecord .putAll (record .CH_LINK ());
616
+ }
617
+
618
+ Map <String , String > externalDBInfo = new HashMap <>();
619
+ String cid = "" ;
620
+ if (inRecord .containsKey ("PUBCHEM" )) {
621
+ String pubchem = inRecord .get ("PUBCHEM" );
622
+ if (pubchem .startsWith ("SID:" )) {
623
+ String sid = pubchem .substring (4 ); // Extract the identifier after "SID:"
624
+ cid = fetchCIDFromSID (sid );
625
+ if (cid != null ) {
626
+ externalDBInfo = fetchExternalDBIdFromPubchemCID (cid );
627
+ } else {
628
+ return record .toString ();
629
+ }
630
+ }
631
+ }
632
+
633
+ // Rename keys
634
+ Map <String , String > renamedExternalDBInfo = new HashMap <>();
635
+ for (Entry <String , String > entry : externalDBInfo .entrySet ()) {
636
+ String key = entry .getKey ();
637
+ if (key .equals ("ChEBI ID" )) {
638
+ key = "CHEBI" ;
639
+ }
640
+ if (key .equals ("HMDB ID" )) {
641
+ key = "HMDB" ;
642
+ }
643
+ if (key .equals ("KEGG ID" )) {
644
+ key = "KEGG" ;
645
+ }
646
+ if (key .equals ("Lipid Maps ID (LM_ID)" )) {
647
+ key = "LIPIDMAPS" ;
648
+ }
649
+ if (key .equals ("ChEMBL ID" )) {
650
+ key = "CHEMBL" ;
651
+ }
652
+ renamedExternalDBInfo .put (key , entry .getValue ());
515
653
}
516
- if (record .CH_LINK ().get ("CAS" ) != null ) {
517
- inRecord .put ("CAS" , record .CH_LINK ().get ("CAS" ));
654
+ externalDBInfo = renamedExternalDBInfo ;
655
+
656
+ if (inRecord .get ("InChI" ) != null ) {
657
+ for (Entry <String , String > entry : inRecord .entrySet ()) {
658
+ String key = entry .getKey ();
659
+ if (externalDBInfo .containsKey (key )) {
660
+ String inRecordValue = entry .getValue ();
661
+ String externalDBInfoValue = externalDBInfo .get (key );
662
+ if (!inRecordValue .equals (externalDBInfoValue )) {
663
+ System .out .println ("Mismatch found for key: " + key );
664
+ System .out .println ("inRecord value: " + inRecordValue );
665
+ System .out .println ("externalDBInfo value: " + externalDBInfoValue );
666
+ }
667
+ }
668
+ }
669
+ }
670
+ if (inRecord .get ("InChI" ) != null && externalDBInfo .get ("InChI" ) != null &&
671
+ inRecord .get ("InChI" ).substring (0 , 14 ).equals (externalDBInfo .get ("InChI" ).substring (0 , 14 ))) {
672
+ Map <String , String > chlink = record .CH_LINK ();
673
+ chlink .put ("PUBCHEM" , "CID:" + cid );
674
+ } else {
675
+ System .out .println ("InChI does not match." );
676
+ System .out .println ("InChIKey in Record: " + inRecord .get ("INCHIKEY" ));
677
+ System .out .println ("InChIKey for compound in CID: " + externalDBInfo .get ("INCHIKEY" ));
678
+ System .out .println ("Rewrite SID to CID? (y/n/s): " );
679
+ Scanner scanner = new Scanner (System .in );
680
+ String response = scanner .nextLine ();
681
+ Map <String , String > chlink = record .CH_LINK ();
682
+ if (response .equalsIgnoreCase ("y" )) {
683
+ chlink .put ("PUBCHEM" , "CID:" + cid );
684
+ }
685
+ // for (Entry<String, String> entry : chlink.entrySet()) {
686
+ // String key = entry.getKey();
687
+ // if (externalDBInfo.containsKey(key)) {
688
+ // chlink.put(key, externalDBInfo.get(key));
689
+ // }
690
+ // }
691
+ // record.CH_LINK(chlink);
692
+ // } else if (response.equalsIgnoreCase("n")) {
693
+ // chlink.put("PUBCHEM", "CID:" + cid);
694
+ // } else {
695
+ // return record.toString();
696
+ // }
697
+ // }
518
698
}
519
699
520
- System .out .println (inRecord );
521
700
522
- System .out .println (fetchCIDFromSID ("5689" ));
523
701
524
702
return record .toString ();
525
703
}
@@ -542,11 +720,12 @@ public static void main(String[] arguments) throws Exception {
542
720
}
543
721
544
722
RecordParser recordparser = new RecordParser (new HashSet <>());
545
- recordFiles .parallelStream ()
723
+ recordFiles .stream ()
546
724
.map (Validator ::readFile )
547
725
.filter (Objects ::nonNull )
548
726
.forEach (recordString -> {
549
727
logger .info ("Working on {}." , recordString .getKey ());
728
+ System .out .println ("Working on " + recordString .getKey ());
550
729
Record record = parseRecord (recordString , recordparser );
551
730
if (record == null || record .DEPRECATED ()) return ;
552
731
@@ -561,6 +740,11 @@ public static void main(String[] arguments) throws Exception {
561
740
//}
562
741
563
742
recordStringAfterMod = doNormalizeCompoundIdentifier (record );
743
+ try {
744
+ Thread .sleep (100 );
745
+ } catch (InterruptedException e ) {
746
+ throw new RuntimeException (e );
747
+ }
564
748
565
749
566
750
// if (doAddPubchemCid.get()) {
@@ -595,6 +779,7 @@ public static void main(String[] arguments) throws Exception {
595
779
}
596
780
}
597
781
});
782
+ System .out .println ();
598
783
}
599
784
600
785
private static CommandLine parseCommandLine (String [] arguments ) {
0 commit comments