Skip to content

Commit d008f47

Browse files
author
markheger
authored
Merge pull request #52 from IBMStreams/develop
1.4.0
2 parents c95c67d + 765d18b commit d008f47

File tree

412 files changed

+5180
-728
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

412 files changed

+5180
-728
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# ignore directories generated by Streams compiler and runtime
44
**/.apt_generated/
5-
**/.settings/
5+
#**/.settings/
66
**/output/
77

88
**/src-gen/

build.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
<arg value="Makefile" />
5151
<arg value="clean" />
5252
</exec>
53+
<ant dir="test" target="clean">
54+
</ant>
5355
</target>
5456

5557

com.ibm.streamsx.nlp/.externalToolBuilders/org.eclipse.jdt.core.javabuilder.launch

Lines changed: 0 additions & 6 deletions
This file was deleted.

com.ibm.streamsx.nlp/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
/output/
55
/data/
66
/.pydevproject
7+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
eclipse.preferences.version=1
2+
line.separator=\n
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
3+
<properties>
4+
<comment>SPL Build Options for Project</comment>
5+
<entry key="com.ibm.streams.studio.splproject:EXT_CLEAN_BUILD_CMD">make clean</entry>
6+
<entry key="com.ibm.streams.studio.splproject:REPLACE_ENVIRONMENT">F</entry>
7+
<entry key="com.ibm.streams.studio.splproject:EXT_FULL_BUILD_CMD">make all</entry>
8+
<entry key="com.ibm.streams.studio.splproject:OUTPUT_DIR">output</entry>
9+
<entry key="com.ibm.streams.studio.splproject:BUILDER_TYPE">1</entry>
10+
<entry key="com.ibm.streams.studio.splproject:DERIVED_OUTPUT_DIR">T</entry>
11+
<entry key="com.ibm.streams.studio.splproject:EXT_INCR_BUILD_CMD">make</entry>
12+
<entry key="com.ibm.streams.studio.splproject:DATA_DIR">data</entry>
13+
<entry key="com.ibm.streams.studio.splproject:EXT_BUILD_OUTPUT">T</entry>
14+
</properties>

com.ibm.streamsx.nlp/bin/createTypes.pl

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ ($;$)
3232
3333
Options:
3434
--pearFile <filename> Specifies the PEAR file to be loaded.
35-
--outputfile <filename> Filename as the output file name. .spl will be appended.
36-
--help Print this message.
35+
--outputfile <filename> Filename as the output file name.
36+
--namespace <namespace> SPL namespace of the generated SPL files (optional)
37+
--main <composite> Name of the main composite to be generated (optional). File <composite>.spl will be created.
38+
--help Print this message.
3739
END
3840
if ($retCode == 0) {
3941
print STDOUT $usageString;
@@ -144,9 +146,20 @@ END
144146
return $prolog;
145147
}
146148

149+
sub addNamespace($) {
150+
151+
my ($namespace) = @_;
152+
my $prolog=<<END;
153+
namespace $namespace;
154+
155+
END
156+
return $prolog;
157+
}
158+
147159
sub main() {
148160
my $pearFile;
149161
my $outfilename;
162+
my $namespace;
150163
my $needHelp;
151164
my $outfilename="TypesGenerated.spl";
152165
my $createMain = 0;
@@ -155,6 +168,7 @@ ()
155168

156169
GetOptions ("pearFile=s" => \$pearFile,
157170
"outputfile=s", \$outfilename,
171+
"namespace=s", \$namespace,
158172
"main=s",\$mainParam,
159173
"help|h|?",\$needHelp,
160174
) or usage(1);
@@ -192,9 +206,29 @@ ()
192206
if ($createMain) {
193207
my $annoType=getFirstAnnoType($outfilename);
194208
open (MAINFILE,">".$compositeName.".spl") or die "Could not create file ".$compositeName.".spl";
209+
if (defined $namespace) {
210+
print MAINFILE addNamespace($namespace);
211+
}
195212
print MAINFILE generateComposite($compositeName,$pearFile,$annoType);
196213
close MAINFILE;
197214
}
215+
216+
if (defined $namespace) {
217+
executeAndCapture("mkdir -p $namespace");
218+
if ($createMain) {
219+
my $mainFile=$compositeName.".spl";
220+
executeAndCapture("mv $mainFile $namespace");
221+
}
222+
open (DATA, "<".$outfilename) || die "could not open $outfilename\n";
223+
my @body=<DATA>;
224+
close(DATA);
225+
# add namespace to top of the file
226+
open(TYPESFILE,">".$namespace."/".$outfilename);
227+
print TYPESFILE addNamespace($namespace);
228+
print TYPESFILE @body;
229+
close(TYPESFILE);
230+
executeAndCapture("rm -f $outfilename");
231+
}
198232
}
199233

200234
main();

com.ibm.streamsx.nlp/com.ibm.streamsx.nlp/ContentRanking.spl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ use com.ibm.streamsx.nlp.utils::*;
1010
/**
1111
* This operator uses a previously trained model in order to find out the most likely intend of a text.
1212
*
13+
* The ContentRanking operator should be used in a Streams release where SPL Python primitive support is not provided (e.g. Streams 3.2). It uses a **ShellPipe** operator to invoke Python scripts.
14+
* When using a Streams release 4.2 or later, it is recommended to create a *SPL Python primitive operator* to invoke Python classes or functions.
15+
*
1316
* @input InStream
1417
* One tuple is one document.
1518
*
@@ -42,6 +45,9 @@ use com.ibm.streamsx.nlp.utils::*;
4245
*
4346
* @param outStreamType
4447
* The OutStream (output port 0) schema of this operator. The schema must contain the schema defined by the resultType.
48+
*
49+
* @param initOnFirstTuple
50+
* The script is called on operator startup. If this parameter is set to true, then the script is called on first tuple.
4551
*/
4652
public composite ContentRanking (
4753
input InStream;
@@ -56,6 +62,7 @@ public composite ContentRanking (
5662
expression<rstring> $kbNamesFile;
5763
attribute $documentAttribute;
5864
type $outStreamType;
65+
expression<boolean> $initOnFirstTuple : false;
5966

6067
/**
6168
* OutStream must contain the resultType schema
@@ -77,6 +84,7 @@ public composite ContentRanking (
7784
command: $pythonCommand + " -u " + $pythonScript + " " + $modelFile + " " + $kbIndexFile + " " + $dictLemmasFile;
7885
stdinAttribute: $documentAttribute;
7986
stdoutAttribute: "crPythonScriptOutput";
87+
initOnFirstTuple: $initOnFirstTuple;
8088
}
8189

8290
(

com.ibm.streamsx.nlp/com.ibm.streamsx.nlp/ContentRankingModelBuilder.spl

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ use com.ibm.streamsx.nlp.utils::*;
1010
/**
1111
* This operator trains a model for ContentRanking based on training documents.
1212
*
13+
* The ContentRankingModelBuilder operator should be used in a Streams release where SPL Python primitive support is not provided (e.g. Streams 3.2) . It uses a **ShellSource** operator to invoke Python scripts.
14+
* When using a Streams release 4.2 or later, it is recommended to create a *SPL Python primitive operator* to invoke Python classes or functions.
15+
*
1316
* @param pythonCommand
1417
* The name of the python binary. The default name is `python`.
1518
* With this parameter you can change the version and the location of the python command according to your environment.
@@ -32,6 +35,59 @@ use com.ibm.streamsx.nlp.utils::*;
3235
*/
3336
public composite ContentRankingModelBuilder
3437
(
38+
) {
39+
param
40+
expression<rstring> $pythonCommand: "python"; // Content ranking scripts need python 2.7 or above!
41+
expression<rstring> $pythonScript: getToolkitPath()+"/etc/python/ContentRankingModelBuilder.py";
42+
expression<rstring> $xRegFile;
43+
expression<rstring> $yRegFile;
44+
expression<rstring> $modelFilesDirectory;
45+
expression<rstring> $nEpoch: "5";
46+
47+
graph
48+
49+
(stream<rstring result> ResultStream) as CRMB = ContentRankingModelBuilderCore () {
50+
param
51+
nEpoch: $nEpoch;
52+
xRegFile: $xRegFile;
53+
yRegFile: $yRegFile;
54+
modelFilesDirectory: $modelFilesDirectory;
55+
pythonCommand: $pythonCommand;
56+
pythonScript: $pythonScript;
57+
}
58+
}
59+
60+
/**
61+
* This operator trains a model for ContentRanking based on training documents.
62+
*
63+
* The ContentRankingModelBuilder operator should be used in a Streams release where SPL Python primitive support is not provided (e.g. Streams 3.2) . It uses a **ShellSource** operator to invoke Python scripts.
64+
* When using a Streams release 4.2 or later, it is recommended to create a *SPL Python primitive operator* to invoke Python classes or functions.
65+
*
66+
* @output ResultStream
67+
* Result stream, window marker at end of building the model
68+
*
69+
* @param pythonCommand
70+
* The name of the python binary. The default name is `python`.
71+
* With this parameter you can change the version and the location of the python command according to your environment.
72+
* Content ranking scripts need python 2.7 or later!
73+
*
74+
* @param pythonScript
75+
* The name of the python script. The default is `<toolkit_dir>/etc/python/ContentRankingModelBuilder.py`.
76+
*
77+
* @param xRegFile
78+
* The name of the input file containing the X-register training data.
79+
*
80+
* @param yRegFile
81+
* The name of the input file containing the Y-register training data.
82+
*
83+
* @param modelFilesDirectory
84+
* The name of the output directory where the model pkl files are created.
85+
*
86+
* @param nEpoch
87+
* The number of epochs used for neural network training.
88+
*/
89+
composite ContentRankingModelBuilderCore
90+
( output ResultStream
3591
) {
3692
param
3793
expression<rstring> $pythonCommand: "python"; // Content ranking scripts need python 2.7 or above!
@@ -70,12 +126,67 @@ public composite ContentRankingModelBuilder
70126
}
71127
}
72128

73-
() as WriterStat = Custom(StatStream as I) {
129+
(stream<rstring result> ResultStream) as WriterStat = Custom(StatStream as I) {
74130
logic onTuple I: {
75131
if (exitCode != 0)
76132
appTrc(Trace.error, "Model Builder exit code: " + (rstring)exitCode + " reason: " + exitReason);
77133
printStringLn("Model Builder exit code: " + (rstring)exitCode + " reason: " + exitReason);
134+
submit(Sys.WindowMarker, ResultStream);
78135
}
79136
}
80137

81138
}
139+
140+
/**
141+
* This operator trains a model for ContentRanking based on training documents.
142+
*
143+
* The ContentRankingModelBuilder operator should be used in a Streams release where SPL Python primitive support is not provided (e.g. Streams 3.2) . It uses a **ShellSource** operator to invoke Python scripts.
144+
* When using a Streams release 4.2 or later, it is recommended to create a *SPL Python primitive operator* to invoke Python classes or functions.
145+
*
146+
* @output ResultStream
147+
* Result stream, window marker at end of building the model
148+
*
149+
* @param pythonCommand
150+
* The name of the python binary. The default name is `python`.
151+
* With this parameter you can change the version and the location of the python command according to your environment.
152+
* Content ranking scripts need python 2.7 or later!
153+
*
154+
* @param pythonScript
155+
* The name of the python script. The default is `<toolkit_dir>/etc/python/ContentRankingModelBuilder.py`.
156+
*
157+
* @param xRegFile
158+
* The name of the input file containing the X-register training data.
159+
*
160+
* @param yRegFile
161+
* The name of the input file containing the Y-register training data.
162+
*
163+
* @param modelFilesDirectory
164+
* The name of the output directory where the model pkl files are created.
165+
*
166+
* @param nEpoch
167+
* The number of epochs used for neural network training.
168+
*/
169+
public composite ContentRankingModelBuilder2
170+
( output ResultStream
171+
) {
172+
param
173+
expression<rstring> $pythonCommand: "python"; // Content ranking scripts need python 2.7 or above!
174+
expression<rstring> $pythonScript: getToolkitPath()+"/etc/python/ContentRankingModelBuilder.py";
175+
expression<rstring> $xRegFile;
176+
expression<rstring> $yRegFile;
177+
expression<rstring> $modelFilesDirectory;
178+
expression<rstring> $nEpoch: "5";
179+
180+
graph
181+
182+
(stream<rstring result> ResultStream) as CRMB = ContentRankingModelBuilderCore () {
183+
param
184+
nEpoch: $nEpoch;
185+
xRegFile: $xRegFile;
186+
yRegFile: $yRegFile;
187+
modelFilesDirectory: $modelFilesDirectory;
188+
pythonCommand: $pythonCommand;
189+
pythonScript: $pythonScript;
190+
}
191+
192+
}

com.ibm.streamsx.nlp/com.ibm.streamsx.nlp/DictionaryFilter/DictionaryFilter.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
param
2424
filterMode : DictionaryFilter.remove;
2525
textAttribute : text;
26-
dictionaryFile : "stopwords.csv";
26+
dictionaryFile : "etc/stopwords.csv";
2727
output
2828
${outputStream} : ${outputAttribute} = ${value};
2929
}</template>
@@ -51,7 +51,7 @@ In case the filterMode `keep` is selected, then those words of the textAttribute
5151
<name>textAttribute</name>
5252
<description>
5353
<![CDATA[
54-
Specifies the dictionary file loaded on startup.
54+
Specifies the text attribute of the input stream.
5555
]]></description>
5656
<optional>false</optional>
5757
<rewriteAllowed>false</rewriteAllowed>
@@ -63,7 +63,7 @@ Specifies the dictionary file loaded on startup.
6363
<name>dictionaryFile</name>
6464
<description>
6565
<![CDATA[
66-
Specifies the dictionary file loaded on startup.
66+
Specifies the dictionary file loaded on startup. If relative path is used, then root is application directory. It is recommended to store the file in etc directory.
6767
]]></description>
6868
<optional>false</optional>
6969
<rewriteAllowed>false</rewriteAllowed>

0 commit comments

Comments
 (0)