|
7 | 7 | import java.net.URL;
|
8 | 8 | import java.net.URLConnection;
|
9 | 9 | import java.net.URLEncoder;
|
10 |
| -import java.nio.charset.Charset; |
11 | 10 | import java.nio.charset.StandardCharsets;
|
12 | 11 | import java.nio.file.Path;
|
13 | 12 | import java.util.*;
|
|
17 | 16 | import java.util.regex.Matcher;
|
18 | 17 | import java.util.regex.Pattern;
|
19 | 18 |
|
| 19 | +import com.github.difflib.DiffUtils; |
| 20 | +import com.github.difflib.patch.AbstractDelta; |
| 21 | +import com.github.difflib.patch.Patch; |
| 22 | +import com.google.gson.*; |
20 | 23 | import massbank.ProjectPropertiesLoader;
|
21 | 24 | import massbank.RecordParser;
|
22 | 25 | import org.apache.commons.cli.CommandLine;
|
|
39 | 42 | import org.openscience.cdk.smiles.SmiFlavor;
|
40 | 43 | import org.openscience.cdk.smiles.SmilesGenerator;
|
41 | 44 |
|
42 |
| -import com.google.gson.Gson; |
43 |
| -import com.google.gson.GsonBuilder; |
44 |
| -import com.google.gson.JsonObject; |
45 |
| -import com.google.gson.JsonSyntaxException; |
46 |
| - |
47 | 45 | import de.undercouch.citeproc.CSL;
|
48 | 46 | import de.undercouch.citeproc.bibtex.BibTeXConverter;
|
49 | 47 | import de.undercouch.citeproc.bibtex.BibTeXItemDataProvider;
|
@@ -475,6 +473,56 @@ else if (ret == InchiStatus.ERROR) {
|
475 | 473 | }
|
476 | 474 | return record.toString();
|
477 | 475 | }
|
| 476 | + |
| 477 | + public static String fetchCIDFromSID(String sid) { |
| 478 | + String apiUrl = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/substance/sid/" + sid + "/cids/JSON"; |
| 479 | + try { |
| 480 | + HttpURLConnection connection = (HttpURLConnection) new URL(apiUrl).openConnection(); |
| 481 | + connection.setRequestMethod("GET"); |
| 482 | + |
| 483 | + Scanner scanner = new Scanner(connection.getInputStream()); |
| 484 | + StringBuilder response = new StringBuilder(); |
| 485 | + while (scanner.hasNext()) { |
| 486 | + response.append(scanner.nextLine()); |
| 487 | + } |
| 488 | + scanner.close(); |
| 489 | + |
| 490 | + JsonObject jsonResponse = JsonParser.parseString(response.toString()).getAsJsonObject(); |
| 491 | + JsonArray cid = jsonResponse.getAsJsonObject("InformationList") |
| 492 | + .getAsJsonArray("Information") |
| 493 | + .get(0).getAsJsonObject() |
| 494 | + .getAsJsonArray("CID"); |
| 495 | + |
| 496 | + if (cid.size() != 1) { |
| 497 | + System.out.println("Error: More than one CID found for SID " + sid); |
| 498 | + return null; |
| 499 | + } |
| 500 | + |
| 501 | + return cid.get(0).getAsString(); |
| 502 | + } catch (IOException e) { |
| 503 | + e.printStackTrace(); |
| 504 | + System.out.println("Error fetching CID"); |
| 505 | + return null; |
| 506 | + } |
| 507 | + } |
| 508 | + |
| 509 | + public static String doNormalizeCompoundIdentifier(Record record) { |
| 510 | + Map<String, String> inRecord = new HashMap<>(); |
| 511 | + inRecord.put("Inchi", record.CH_IUPAC()); |
| 512 | + inRecord.put("SMILES", record.CH_SMILES()); |
| 513 | + if (record.CH_LINK().get("PUBCHEM") != null) { |
| 514 | + inRecord.put("PUBCHEM", record.CH_LINK().get("PUBCHEM")); |
| 515 | + } |
| 516 | + if (record.CH_LINK().get("CAS") != null) { |
| 517 | + inRecord.put("CAS", record.CH_LINK().get("CAS")); |
| 518 | + } |
| 519 | + |
| 520 | + System.out.println(inRecord); |
| 521 | + |
| 522 | + System.out.println(fetchCIDFromSID("5689")); |
| 523 | + |
| 524 | + return record.toString(); |
| 525 | + } |
478 | 526 |
|
479 | 527 |
|
480 | 528 | public static void main(String[] arguments) throws Exception {
|
@@ -512,12 +560,23 @@ public static void main(String[] arguments) throws Exception {
|
512 | 560 | // recordstring2=doAddInchikey(record);
|
513 | 561 | //}
|
514 | 562 |
|
| 563 | + recordStringAfterMod = doNormalizeCompoundIdentifier(record); |
515 | 564 |
|
516 |
| - if (doAddPubchemCid.get()) { |
517 |
| - recordStringAfterMod=doAddPubchemCID(record); |
518 |
| - } |
519 |
| - if (doSetSMILESfromInChi.get()) { |
520 |
| - recordStringAfterMod=doSetSMILESfromInChi(record); |
| 565 | + |
| 566 | +// if (doAddPubchemCid.get()) { |
| 567 | +// recordStringAfterMod=doAddPubchemCID(record); |
| 568 | +// } |
| 569 | +// if (doSetSMILESfromInChi.get()) { |
| 570 | +// recordStringAfterMod=doSetSMILESfromInChi(record); |
| 571 | +// } |
| 572 | + |
| 573 | + List<String> originalList = Arrays.asList(recordString.getValue().split("\\n")); |
| 574 | + List<String> revisedList = Arrays.asList(recordStringAfterMod.split("\\n")); |
| 575 | + |
| 576 | + Patch<String> patch = DiffUtils.diff(originalList, revisedList); |
| 577 | + |
| 578 | + for (AbstractDelta<String> delta : patch.getDeltas()) { |
| 579 | + System.out.println(delta); |
521 | 580 | }
|
522 | 581 |
|
523 | 582 | if (!recordString.getValue().equals(recordStringAfterMod)) {
|
|
0 commit comments