@@ -39,7 +39,7 @@ import scala.util.control.NonFatal
39
39
* do NER processing for persons in EN
40
40
* and write the results to either:
41
41
* - Elasticsearch version 7.x server
42
- * - Opensearch version 1 .x server
42
+ * - Opensearch version 2 .x server
43
43
*
44
44
* Remarks:
45
45
* - We still need spray.json because of the elasticsearch pekko connectors
@@ -63,7 +63,7 @@ object SSEtoElasticsearch extends App {
63
63
}
64
64
65
65
// 2.x model from https://opennlp.apache.org/models.html
66
- private val tokenModel = new TokenizerModel (new FileInputStream (Paths .get(" src/main/resources/opennlp-en-ud-ewt-tokens-1.0-1.9.3 .bin" ).toFile))
66
+ private val tokenModel = new TokenizerModel (new FileInputStream (Paths .get(" src/main/resources/opennlp-en-ud-ewt-tokens-1.2-2.5.0 .bin" ).toFile))
67
67
// 1.5 model from https://opennlp.sourceforge.net/models-1.5
68
68
private val personModel = new TokenNameFinderModel (new FileInputStream (Paths .get(" src/main/resources/en-ner-person.bin" ).toFile))
69
69
@@ -93,7 +93,7 @@ object SSEtoElasticsearch extends App {
93
93
// elasticsearchContainer.start()
94
94
private val dockerImageNameOS = DockerImageName
95
95
.parse(" opensearchproject/opensearch" )
96
- .withTag(" 1.3.19 " )
96
+ .withTag(" 2.18.0 " )
97
97
private val searchContainer = new OpensearchContainer (dockerImageNameOS)
98
98
searchContainer.start()
99
99
@@ -110,7 +110,7 @@ object SSEtoElasticsearch extends App {
110
110
111
111
private val sourceSettings = ElasticsearchSourceSettings (connectionSettings).withApiVersion(ApiVersion .V7 )
112
112
113
- // Note that ElasticsearchSource reads are scroll requests, where you are able to fetch even the entire collection of documents
113
+ // ElasticsearchSource reads are " scroll requests". Allows to fetch the entire collection of documents
114
114
private val elasticsearchSourceTyped = ElasticsearchSource
115
115
.typed[Ctx ](
116
116
searchParams,
@@ -243,7 +243,7 @@ object SSEtoElasticsearch extends App {
243
243
// .mapAsync(3)(ctx => findPersonsRemoteGpt3NER(ctx))
244
244
.filter(ctx => ctx.personsFound.nonEmpty)
245
245
246
- logger.info(s " Elasticsearch container listening on: ${searchContainer.getHttpHostAddress}" )
246
+ logger.info(s " Elasticsearch/Opensearch container listening on: ${searchContainer.getHttpHostAddress}" )
247
247
logger.info(" About to start processing flow..." )
248
248
249
249
restartSource
0 commit comments