Skip to content

Commit a3e7b4a

Browse files
committed
Finish project (2)
1 parent 35117f2 commit a3e7b4a

File tree

4 files changed

+253
-8
lines changed

4 files changed

+253
-8
lines changed

Main.java

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
11
package org.duangsuse.telegramscanner;
22

3+
import com.sun.org.apache.xml.internal.serialize.OutputFormat;
4+
import com.sun.org.apache.xml.internal.serialize.XMLSerializer;
5+
import com.sun.webkit.dom.HTMLDocumentImpl;
6+
import org.duangsuse.telegramscanner.helper.YamlDump;
37
import org.duangsuse.telegramscanner.model.Message;
48
import org.duangsuse.telegramscanner.scanner.Scanner;
59
import org.duangsuse.telegramscanner.scanner.Utf8LineInputStream;
10+
import org.duangsuse.telegramscanner.sourcemanager.Identifiable;
11+
import org.duangsuse.telegramscanner.sourcemanager.SourceManager;
12+
import org.w3c.dom.Document;
13+
import org.w3c.dom.Element;
614

15+
import javax.xml.parsers.DocumentBuilderFactory;
16+
import javax.xml.parsers.ParserConfigurationException;
17+
import java.beans.XMLEncoder;
718
import java.io.IOException;
819
import java.io.PrintStream;
9-
import java.util.Arrays;
20+
import java.io.StringWriter;
21+
import java.util.*;
1022

11-
import java.util.List;
23+
import java.util.logging.XMLFormatter;
1224

1325
/**
1426
* Application main class
@@ -42,12 +54,91 @@ private Main() {}
4254
public static void main(String... args) {
4355
err.print("TelegramScanner version "); err.println(VERSION);
4456
List<String> argList = Arrays.asList(args);
57+
HashSet<Message<String>> messageSet = new HashSet<>();
4558

46-
if (argList.contains("-test"))
59+
if (argList.contains("-test")) {
4760
testInput();
61+
System.exit(0);
62+
}
4863

4964
for (Message<String> stringMessage : new Scanner(System.in)) {
50-
out.print(stringMessage.toString());
65+
messageSet.add(stringMessage);
66+
}
67+
68+
if (argList.contains("-dump")) {
69+
//XMLEncoder coder = new XMLEncoder(out);
70+
71+
//coder.writeObject(YamlDump.getMessageMaps(messageSet));
72+
//coder.flush();
73+
74+
List<Map<String, Object>> maps = YamlDump.getMessageMaps(messageSet);
75+
76+
Document doc = null;
77+
try {
78+
doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
79+
} catch (ParserConfigurationException e) {
80+
e.printStackTrace();
81+
}
82+
83+
if (doc == null) System.exit(1);
84+
85+
Element list = doc.createElement("ol");
86+
for (Map<String, Object> map : maps) {
87+
Element child = doc.createElement("dl");
88+
89+
for (String key : map.keySet()) {
90+
Element messageObjectKey = doc.createElement("dt");
91+
Element messageObject = doc.createElement("dd");
92+
93+
messageObjectKey.setTextContent(key);
94+
messageObject.setTextContent(map.get(key).toString());
95+
96+
child.appendChild(messageObjectKey);
97+
child.appendChild(messageObject);
98+
}
99+
100+
list.appendChild(child);
101+
}
102+
103+
OutputFormat fmt = new OutputFormat(doc);
104+
105+
fmt.setIndenting(true);
106+
fmt.setIndent(4);
107+
fmt.setLineWidth(65);
108+
109+
StringWriter writer = new StringWriter();
110+
XMLSerializer serializer = new XMLSerializer(writer, fmt);
111+
112+
try {
113+
serializer.serialize(list);
114+
} catch (IOException e) {
115+
e.printStackTrace();
116+
}
117+
out.println(writer);
118+
119+
System.exit(0);
120+
}
121+
122+
if (argList.contains("-dump-yaml")) {
123+
YamlDump.dump(messageSet);
124+
System.exit(0);
125+
}
126+
127+
if (argList.contains("-dump-debug-yaml")) {
128+
YamlDump.dumpSourceManager(SourceManager.getInstance());
129+
System.exit(0);
130+
}
131+
132+
out.println(messageSet);
133+
134+
for (Identifiable key : SourceManager.getInstance().keySet()) {
135+
out.print(key); out.print(": ");
136+
out.println(SourceManager.getInstance().get(key));
137+
138+
for (Message<String> m: messageSet) {
139+
if (m.getIdentity() == key.getIdentity())
140+
out.println(m);
141+
}
51142
}
52143
}
53144

helper/YamlDump.java

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package org.duangsuse.telegramscanner.helper;
2+
3+
import org.duangsuse.telegramscanner.model.Message;
4+
import org.duangsuse.telegramscanner.sourcemanager.Identifiable;
5+
import org.duangsuse.telegramscanner.sourcemanager.SimpleMapDelegate;
6+
import org.duangsuse.telegramscanner.sourcemanager.SourceLocation;
7+
import org.duangsuse.telegramscanner.sourcemanager.SourceManager;
8+
import org.jetbrains.annotations.NotNull;
9+
import org.snakeyaml.engine.v1.api.Dump;
10+
import org.snakeyaml.engine.v1.api.DumpSettings;
11+
import org.snakeyaml.engine.v1.api.DumpSettingsBuilder;
12+
import org.snakeyaml.engine.v1.common.ScalarStyle;
13+
14+
import java.util.*;
15+
16+
/**
17+
* Yaml dump for messages
18+
*
19+
* @see org.duangsuse.telegramscanner.model.Message
20+
*/
21+
public final class YamlDump {
22+
/**
23+
* Dump messages method
24+
*/
25+
public static void dump(Set<Message<String>> msgs) {
26+
DumpSettings settings = new DumpSettingsBuilder()
27+
.setIndent(2)
28+
.setCanonical(true)
29+
.setSplitLines(false)
30+
.setDefaultScalarStyle(ScalarStyle.DOUBLE_QUOTED)
31+
.build();
32+
33+
Dump d = new Dump(settings);
34+
35+
List<Map<String, Object>> listMsgs = getMessageMaps(msgs);
36+
37+
System.out.print(d.dumpToString(listMsgs));
38+
}
39+
40+
/**
41+
* Convert a message set to standard collection data set
42+
* <br>
43+
* no deep copy for {@link SourceManager} required, debug information is inlined into output map
44+
*
45+
* @param msgs messages to convert
46+
* @return standard (list, map, int, string) representation of message set
47+
*/
48+
@NotNull
49+
public static List<Map<String, Object>> getMessageMaps(@NotNull Set<Message<String>> msgs) {
50+
Map<Integer, Map<String, Integer>> debugs = getIntegerDebugMap(SourceManager.getInstance());
51+
52+
List<Map<String, Object>> listMsgs = new LinkedList<>();
53+
54+
for (Message<String> m : msgs) {
55+
Map<String, Object> yamlObject = new HashMap<>();
56+
57+
yamlObject.put("header_type", m.getHeaderType().name());
58+
yamlObject.put("name", m.getHeader().getSourceName());
59+
yamlObject.put("published", m.getHeader().getPublishedAt().getTime());
60+
yamlObject.put("ext", m.getMessageExtRef());
61+
yamlObject.put("body_type", m.getBodyType().name());
62+
yamlObject.put("body", m.getMessageBody());
63+
yamlObject.put("links", m.getLinks());
64+
yamlObject.put("hashtags", m.getHashtags());
65+
66+
if (debugs.containsKey(m.getIdentity()))
67+
yamlObject.put("debug", debugs.get(m.getIdentity()));
68+
69+
listMsgs.add(yamlObject);
70+
}
71+
return listMsgs;
72+
}
73+
74+
/**
75+
* Dump source manager
76+
*/
77+
public static void dumpSourceManager(@NotNull SimpleMapDelegate<Identifiable, SourceLocation> debug) {
78+
Dump d = new Dump(new DumpSettingsBuilder().setIndent(2).setSplitLines(true).build());
79+
80+
Map<Integer, Map<String, Integer>> repr = getIntegerDebugMap(debug);
81+
82+
System.out.println(d.dumpToString(repr));
83+
}
84+
85+
/**
86+
* Get json-map style debug information
87+
*
88+
* @param debug source manager map
89+
* @return map and sub-map representation of {@link org.duangsuse.telegramscanner.sourcemanager.SourceManager}
90+
*/
91+
@NotNull
92+
private static Map<Integer, Map<String, Integer>> getIntegerDebugMap(@NotNull SimpleMapDelegate<Identifiable, SourceLocation> debug) {
93+
Map<Integer, Map<String, Integer>> repr = new HashMap<>();
94+
95+
for (Identifiable key : debug.keySet()) {
96+
SourceLocation value = debug.get(key);
97+
98+
Map<String, Integer> obj = new HashMap<>();
99+
100+
obj.put("message_no", value.getMessageNo());
101+
obj.put("message_line", value.getMessageLine());
102+
103+
obj.put("offset", value.getOffset());
104+
obj.put("line", value.getLine());
105+
106+
repr.put(key.getIdentity(), obj);
107+
}
108+
return repr;
109+
}
110+
}

scanner/Scanner.java

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import org.duangsuse.telegramscanner.Main;
44
import org.duangsuse.telegramscanner.helper.Strings;
55
import org.duangsuse.telegramscanner.model.*;
6+
import org.duangsuse.telegramscanner.sourcemanager.Identifiable;
7+
import org.duangsuse.telegramscanner.sourcemanager.SourceLocation;
8+
import org.duangsuse.telegramscanner.sourcemanager.SourceManager;
69
import org.jetbrains.annotations.NotNull;
710

811
import java.io.IOException;
@@ -81,6 +84,7 @@ public Message<String> next() {
8184
//now message header is in lastLine
8285

8386
// set tgName and tgHeader
87+
if (lastLine == null) return null; // bad practice
8488
Matcher m = RegexConstants.MESSAGE_HEAD.matcher(lastLine);
8589
m.reset();
8690
assert m.matches(): "Checked lastLine should be matched regex pattern";
@@ -161,6 +165,7 @@ public Message<String> next() {
161165
$extRef = String.valueOf("");
162166

163167
scannerInfo("Break; Scanning message body, " + $lastHeadType + "~" + $lastHead.toString() + ", E:" + $extRef);
168+
markSourceObject($lastHead);
164169
state = ScannerState.SCAN_BODY;
165170

166171
/* fail through */
@@ -179,7 +184,7 @@ public Message<String> next() {
179184
matchTextPart(bareMatcher, $bareLinks, 1, 2);
180185
matchTextPart(markdownMatcher, $links, 1, 2);
181186

182-
bodyBuffer.append(line);
187+
bodyBuffer.append(line).append(System.lineSeparator());
183188
});
184189
keepLineOnce = true; // keep message head line
185190

@@ -216,32 +221,68 @@ public Message<String> next() {
216221

217222
++messageNo;
218223
localLine = 0;
224+
markSourceObject(lastMessage);
219225
return lastMessage;
220226
}
221227
};
222228
}
223229

230+
/**
231+
* Fetch current scanner source location
232+
*
233+
* @return current scanning location
234+
*/
235+
public SourceLocation getCurrentSourceLocation() {
236+
return new SourceLocation(offset, line, messageNo, localLine);
237+
}
238+
239+
/**
240+
* Mark source object to global {@link org.duangsuse.telegramscanner.sourcemanager.SourceManager}
241+
*
242+
* @see SourceManager#getInstance() instance pool
243+
* @param sourceObj identifiable object to be added with current scanner position
244+
*/
245+
public void markSourceObject(Identifiable sourceObj) {
246+
SourceManager.getInstance().put(sourceObj, getCurrentSourceLocation());
247+
}
248+
224249
/**
225250
* Match text part using {@link Matcher}, collecting groups
226251
*
227252
* @param matcher text matcher
228253
* @param dst destination collection
254+
* @param separator string join separator
229255
* @param groups to be collected (and concatenated)
230256
*/
231-
private void matchTextPart(@NotNull Matcher matcher, Collection<String> dst, int... groups) {
257+
private void matchTextPart(@NotNull Matcher matcher, Collection<String> dst, String separator, int... groups) {
232258
//if (matcher.matches())
233259
// for (int i = 1; i < matcher.groupCount(); i++)
234260
// dst.add(matcher.group(i).trim());
235261

236262
while (matcher.find()) {
237263
StringBuilder sb = new StringBuilder();
264+
238265
for (int i: groups) {
239-
sb.append(matcher.group(i));
266+
sb.append(matcher.group(i)).append(separator);
240267
}
268+
269+
sb.delete(sb.length() - separator.length(), sb.length());
270+
241271
dst.add(sb.toString());
242272
}
243273
}
244274

275+
/**
276+
* Match text part using {@link Matcher}, collecting groups, using "://" as separator
277+
*
278+
* @param matcher text matcher
279+
* @param dst destination collection
280+
* @param groups to be collected (and concatenated)
281+
*/
282+
private void matchTextPart(@NotNull Matcher matcher, Collection<String> dst, int... groups) {
283+
matchTextPart(matcher, dst, "://", groups);
284+
}
285+
245286
/**
246287
* messageBody toString preview length
247288
*/

sourcemanager/SourceManager.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.duangsuse.telegramscanner.sourcemanager;
22

3+
import org.jetbrains.annotations.Contract;
4+
35
import java.util.HashMap;
46

57
/**
@@ -21,5 +23,6 @@ private static final class LazyHolder {
2123
static { INSTANCE = new SourceManager(); }
2224
}
2325

24-
public SourceManager getInstance() { return LazyHolder.INSTANCE; }
26+
@Contract(pure = true)
27+
public static SourceManager getInstance() { return LazyHolder.INSTANCE; }
2528
}

0 commit comments

Comments
 (0)