Skip to content

Commit aa8763f

Browse files
author
Drew Kerrigan
committed
Updated StanfordCoreNLPClient so that it won't fallback to local processing. Fixed problem preventing updates to processor config properties. Simplified StanfordCoreNLPService.
1 parent f244588 commit aa8763f

File tree

8 files changed

+906
-135
lines changed

8 files changed

+906
-135
lines changed

nifi-stanfordcorenlp-nar/pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
<parent>
2020
<groupId>com.iss.nifi</groupId>
2121
<artifactId>nifi-stanfordcorenlp-processor</artifactId>
22-
<version>1.0</version>
22+
<version>1.1</version>
2323
</parent>
2424

2525
<artifactId>nifi-stanfordcorenlp-nar</artifactId>
26-
<version>1.0</version>
26+
<version>1.1</version>
2727
<packaging>nar</packaging>
2828
<properties>
2929
<maven.javadoc.skip>true</maven.javadoc.skip>
@@ -34,7 +34,7 @@
3434
<dependency>
3535
<groupId>com.iss.nifi</groupId>
3636
<artifactId>nifi-stanfordcorenlp-processors</artifactId>
37-
<version>1.0</version>
37+
<version>1.1</version>
3838
</dependency>
3939
</dependencies>
4040

nifi-stanfordcorenlp-processors/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
<parent>
2020
<groupId>com.iss.nifi</groupId>
2121
<artifactId>nifi-stanfordcorenlp-processor</artifactId>
22-
<version>1.0</version>
22+
<version>1.1</version>
2323
</parent>
2424

2525
<artifactId>nifi-stanfordcorenlp-processors</artifactId>

nifi-stanfordcorenlp-processors/src/main/java/com/iss/nifi/processors/stanfordcorenlp/StanfordCoreNLPProcessor.java

Lines changed: 63 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
*
33
* MIT License
44
*
5-
* Copyright (c) 2019 Institutional Shareholder Services. All other rights reserved.
5+
* Copyright (c) 2020 Institutional Shareholder Services. All other rights reserved.
66
*
77
* Permission is hereby granted, free of charge, to any person obtaining a copy
88
* of this software and associated documentation files (the "Software"), to deal
@@ -66,6 +66,8 @@
6666
import org.apache.nifi.processor.io.OutputStreamCallback;
6767
import org.apache.nifi.processor.util.StandardValidators;
6868

69+
import edu.stanford.nlp.pipeline.AnnotationPipeline;
70+
6971
@Tags({ "Stanford", "CoreNLP" })
7072
@CapabilityDescription("Stanford CoreNLP Processor")
7173
@SeeAlso({})
@@ -157,7 +159,8 @@ public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
157159

158160
@OnScheduled
159161
public void onScheduled(final ProcessContext context) throws Exception {
160-
ensureService(context);
162+
getLogger().debug("OnScheduled called for StanfordCoreNLPProcessor, refreshing StanfordCoreNLPService");
163+
service = new StanfordCoreNLPService(createPipeline(context));
161164
}
162165

163166
@Override
@@ -169,22 +172,22 @@ public void onTrigger(final ProcessContext context, final ProcessSession session
169172
flowFile = session.create();
170173
}
171174

172-
String flowFileText = getTextFromSession(session, flowFile);
175+
final String flowFileText = getTextFromSession(session, flowFile);
173176

174177
if (flowFileText == null || flowFileText.isEmpty()) {
175178
getLogger().error("Empty flow file cannot be analyzed");
176179
session.transfer(flowFile, FAILURE_RELATIONSHIP);
177180
return;
178181
}
179182

180-
String jsonPath = context.getProperty(PATH_ATTR).evaluateAttributeExpressions(flowFile).getValue();
181-
String entityTypes = context.getProperty(ENTITIES_ATTR).evaluateAttributeExpressions(flowFile).getValue();
182-
String text = getTextFromJson(flowFileText, jsonPath);
183+
final String jsonPath = context.getProperty(PATH_ATTR).evaluateAttributeExpressions(flowFile).getValue();
184+
final String entityTypes = context.getProperty(ENTITIES_ATTR).evaluateAttributeExpressions(flowFile).getValue();
185+
final String text = getTextFromJson(flowFileText, jsonPath);
183186
Map<String, List<String>> entityMap;
184187

185188
try {
186189
entityMap = service.extractEntities(text, entityTypes);
187-
} catch (Exception e) {
190+
} catch (final Exception e) {
188191
e.printStackTrace();
189192
getLogger().error("Failed to analyze flow file text");
190193
session.transfer(flowFile, FAILURE_RELATIONSHIP);
@@ -193,136 +196,142 @@ public void onTrigger(final ProcessContext context, final ProcessSession session
193196

194197
Map<String, Object> flowFileJsonMap;
195198

196-
Gson gson = new Gson();
199+
final Gson gson = new Gson();
197200
try {
198201
flowFileJsonMap = gson.fromJson(flowFileText, Map.class);
199-
} catch (JsonSyntaxException e) {
202+
} catch (final JsonSyntaxException e) {
200203
e.printStackTrace();
201204
getLogger().warn("Failed to parse flow file text as json, writing new flow file from blank json document");
202205
flowFileJsonMap = new HashMap<String, Object>();
203206
}
204207

205208
try {
206-
for (String k : entityMap.keySet()) {
209+
for (final String k : entityMap.keySet()) {
207210
flowFileJsonMap.put(k, entityMap.get(k));
208211
}
209212

210-
String entityJson = gson.toJson(entityMap);
211-
String finalJson = gson.toJson(flowFileJsonMap);
213+
final String entityJson = gson.toJson(entityMap);
214+
final String finalJson = gson.toJson(flowFileJsonMap);
212215

213216
flowFile = session.putAttribute(flowFile, OUTPUT_ATTR, entityJson);
214217
flowFile = session.write(flowFile, new OutputStreamCallback() {
215218
@Override
216-
public void process(OutputStream out) throws IOException {
219+
public void process(final OutputStream out) throws IOException {
217220
out.write(finalJson.getBytes());
218221
}
219222
});
220223

221224
session.transfer(flowFile, SUCCESS_RELATIONSHIP);
222225
return;
223-
} catch (Exception e) {
226+
} catch (final Exception e) {
224227
e.printStackTrace();
225228
getLogger().warn("Failed to generate flow file or attributes");
226229
}
227230

228231
session.transfer(flowFile, FAILURE_RELATIONSHIP);
229232
}
230233

231-
private String getTextFromSession(final ProcessSession session, FlowFile flowFile) {
234+
private String getTextFromSession(final ProcessSession session, final FlowFile flowFile) {
232235
final AtomicReference<String> atomicText = new AtomicReference<>();
233236

234237
session.read(flowFile, new InputStreamCallback() {
235238
@Override
236-
public void process(InputStream in) throws IOException {
239+
public void process(final InputStream in) throws IOException {
237240
try {
238-
String rawText = IOUtils.toString(in);
241+
final String rawText = IOUtils.toString(in);
239242
atomicText.set(rawText);
240-
} catch (NullPointerException e) {
243+
} catch (final NullPointerException e) {
241244
e.printStackTrace();
242245
getLogger().warn("FlowFile text was null");
243-
} catch (IOException e) {
246+
} catch (final IOException e) {
244247
e.printStackTrace();
245248
getLogger().error("FlowFile text could not be read due to IOException");
246249
}
247250
}
248251
});
249252

250-
String text = atomicText.get();
253+
final String text = atomicText.get();
251254
if (text == null || text.isEmpty()) {
252255
return null;
253256
}
254257

255258
return text;
256259
}
257260

258-
private String getTextFromJson(String flowFileText, String jsonPath) {
261+
private String getTextFromJson(final String flowFileText, final String jsonPath) {
259262
if (jsonPath == null || jsonPath.isEmpty()) {
260263
return flowFileText;
261264
}
262265

263266
try {
264-
Configuration conf = Configuration.builder().options(Option.ALWAYS_RETURN_LIST).build();
265-
List<String> result = JsonPath.using(conf).parse(flowFileText).read(jsonPath);
266-
String combined = String.join(" ", result);
267-
getLogger().info("Extracted this text from the flow file with the configured json path: " + combined);
267+
final Configuration conf = Configuration.builder().options(Option.ALWAYS_RETURN_LIST).build();
268+
final List<String> result = JsonPath.using(conf).parse(flowFileText).read(jsonPath);
269+
final String combined = String.join(" ", result);
268270
return combined;
269-
} catch (ClassCastException e) {
270-
LinkedHashMap<String, Object> resultMap = JsonPath.read(flowFileText, jsonPath);
271+
} catch (final ClassCastException e) {
272+
final LinkedHashMap<String, Object> resultMap = JsonPath.read(flowFileText, jsonPath);
271273
String combined = "";
272-
for (String k : resultMap.keySet()) {
274+
for (final String k : resultMap.keySet()) {
273275
combined += " " + resultMap.get(k);
274276
}
275-
getLogger().info("Extracted this text from the flow file with the configured json path: " + combined);
276277
return combined;
277-
} catch (Exception e) {
278+
} catch (final Exception e) {
278279
e.printStackTrace();
279280
getLogger().warn("Failed to parse json using specified json path, analyzing flow file as text");
280281
}
281282

282283
return flowFileText;
283284
}
284285

285-
private void ensureService(final ProcessContext context) {
286-
if (service != null) {
287-
return;
288-
}
289-
String jsonProps = context.getProperty(PROPS_ATTR).getValue();
290-
Properties props = jsonToProps(jsonProps);
291-
292-
String host = context.getProperty(HOST_ATTR).getValue();
293-
294-
if (host == null) {
295-
service = new StanfordCoreNLPService(props);
296-
return;
297-
}
298-
286+
private int getPort(final ProcessContext context) {
299287
int port;
300288
try {
301289
port = context.getProperty(PORT_ATTR).asInteger();
302-
} catch (NumberFormatException e) {
290+
} catch (final NumberFormatException e) {
303291
e.printStackTrace();
304292
getLogger().error("Failed to read port as integer, using default 9000");
305293
port = 9000;
306294
}
295+
return port;
296+
}
297+
298+
private AnnotationPipeline createPipeline(final ProcessContext context) {
299+
final String jsonProps = context.getProperty(PROPS_ATTR).getValue();
300+
final Properties props = jsonToProps(jsonProps);
301+
final String host = context.getProperty(HOST_ATTR).getValue();
302+
303+
if (host == null) {
304+
return StanfordCoreNLPService.createPipeline(props);
305+
}
306+
307+
final int port = getPort(context);
308+
final String key = context.getProperty(KEY_ATTR).getValue();
309+
final String secret = context.getProperty(SECRET_ATTR).getValue();
307310

308-
String key = context.getProperty(KEY_ATTR).getValue();
309-
String secret = context.getProperty(SECRET_ATTR).getValue();
311+
return StanfordCoreNLPService.createPipeline(props, host, port, key, secret);
312+
}
313+
314+
private void ensureService(final ProcessContext context) {
315+
if (service != null) {
316+
return;
317+
}
310318

311-
service = new StanfordCoreNLPService(props, host, port, key, secret);
319+
service = new StanfordCoreNLPService(createPipeline(context));
320+
return;
312321
}
313322

314-
private Properties jsonToProps(String jsonProps) {
315-
Properties props = new Properties();
323+
private Properties jsonToProps(final String jsonProps) {
324+
final Properties props = new Properties();
316325
if (jsonProps == null) {
317326
return props;
318327
}
319-
Gson gson = new Gson();
328+
final Gson gson = new Gson();
320329
try {
321-
Map<String, Object> jsonMap = gson.fromJson(jsonProps, Map.class);
322-
for (String k : jsonMap.keySet()) {
330+
final Map<String, Object> jsonMap = gson.fromJson(jsonProps, Map.class);
331+
for (final String k : jsonMap.keySet()) {
323332
props.setProperty(k, jsonMap.get(k).toString());
324333
}
325-
} catch (JsonSyntaxException e) {
334+
} catch (final JsonSyntaxException e) {
326335
e.printStackTrace();
327336
getLogger().error("Failed to read json string.");
328337
}

0 commit comments

Comments
 (0)