@@ -39,7 +39,7 @@ import scala.util.control.NonFatal
3939 * do NER processing for persons in EN
4040 * and write the results to either:
4141 * - Elasticsearch version 7.x server
42- * - Opensearch version 1 .x server
42+ * - Opensearch version 2 .x server
4343 *
4444 * Remarks:
4545 * - We still need spray.json because of the elasticsearch pekko connectors
@@ -63,7 +63,7 @@ object SSEtoElasticsearch extends App {
6363 }
6464
6565 // 2.x model from https://opennlp.apache.org/models.html
66- private val tokenModel = new TokenizerModel (new FileInputStream (Paths .get(" src/main/resources/opennlp-en-ud-ewt-tokens-1.0-1.9.3 .bin" ).toFile))
66+ private val tokenModel = new TokenizerModel (new FileInputStream (Paths .get(" src/main/resources/opennlp-en-ud-ewt-tokens-1.2-2.5.0 .bin" ).toFile))
6767 // 1.5 model from https://opennlp.sourceforge.net/models-1.5
6868 private val personModel = new TokenNameFinderModel (new FileInputStream (Paths .get(" src/main/resources/en-ner-person.bin" ).toFile))
6969
@@ -93,7 +93,7 @@ object SSEtoElasticsearch extends App {
9393 // elasticsearchContainer.start()
9494 private val dockerImageNameOS = DockerImageName
9595 .parse(" opensearchproject/opensearch" )
96- .withTag(" 1.3.19 " )
96+ .withTag(" 2.18.0 " )
9797 private val searchContainer = new OpensearchContainer (dockerImageNameOS)
9898 searchContainer.start()
9999
@@ -110,7 +110,7 @@ object SSEtoElasticsearch extends App {
110110
111111 private val sourceSettings = ElasticsearchSourceSettings (connectionSettings).withApiVersion(ApiVersion .V7 )
112112
113- // Note that ElasticsearchSource reads are scroll requests, where you are able to fetch even the entire collection of documents
113+ // ElasticsearchSource reads are " scroll requests". Allows to fetch the entire collection of documents
114114 private val elasticsearchSourceTyped = ElasticsearchSource
115115 .typed[Ctx ](
116116 searchParams,
@@ -243,7 +243,7 @@ object SSEtoElasticsearch extends App {
243243 // .mapAsync(3)(ctx => findPersonsRemoteGpt3NER(ctx))
244244 .filter(ctx => ctx.personsFound.nonEmpty)
245245
246- logger.info(s " Elasticsearch container listening on: ${searchContainer.getHttpHostAddress}" )
246+ logger.info(s " Elasticsearch/Opensearch container listening on: ${searchContainer.getHttpHostAddress}" )
247247 logger.info(" About to start processing flow..." )
248248
249249 restartSource
0 commit comments