diff --git a/docs/changelog/130337.yaml b/docs/changelog/130337.yaml
new file mode 100644
index 0000000000000..2ea20ebd1944e
--- /dev/null
+++ b/docs/changelog/130337.yaml
@@ -0,0 +1,6 @@
+pr: 130337
+summary: Add `XmlProcessor` initial implementation
+area: Ingest Node
+type: enhancement
+issues:
+ - 97364
diff --git a/docs/reference/enrich-processor/index.md b/docs/reference/enrich-processor/index.md
index e220e763024e3..fb2cac99ee355 100644
--- a/docs/reference/enrich-processor/index.md
+++ b/docs/reference/enrich-processor/index.md
@@ -159,6 +159,9 @@ Refer to [Enrich your data](docs-content://manage-data/ingest/transform-enrich/d
[`split` processor](/reference/enrich-processor/split-processor.md)
: Splits a field into an array of values.
+[`xml` processor](/reference/enrich-processor/xml-processor.md)
+: Parses XML documents and converts them to JSON objects.
+
[`trim` processor](/reference/enrich-processor/trim-processor.md)
: Trims whitespace from field.
diff --git a/docs/reference/enrich-processor/toc.yml b/docs/reference/enrich-processor/toc.yml
index 7da271e6f0554..370c020f5f393 100644
--- a/docs/reference/enrich-processor/toc.yml
+++ b/docs/reference/enrich-processor/toc.yml
@@ -46,3 +46,4 @@ toc:
- file: urldecode-processor.md
- file: uri-parts-processor.md
- file: user-agent-processor.md
+ - file: xml-processor.md
diff --git a/docs/reference/enrich-processor/xml-processor.md b/docs/reference/enrich-processor/xml-processor.md
new file mode 100644
index 0000000000000..7452fea12ed65
--- /dev/null
+++ b/docs/reference/enrich-processor/xml-processor.md
@@ -0,0 +1,575 @@
+---
+navigation_title: "XML"
+mapped_pages:
+ - https://www.elastic.co/guide/en/elasticsearch/reference/current/xml-processor.html
+---
+
+# XML processor [xml-processor]
+
+Parses XML documents and converts them to JSON objects using a DOM parser. This processor efficiently handles XML data with a single-parse architecture that supports both structured output and XPath extraction for optimal performance.
+
+$$$xml-options$$$
+
+| Name | Required | Default | Description |
+| --- | --- | --- | --- |
+| `field` | yes | - | The field containing the XML string to be parsed. |
+| `target_field` | no | `field` | The field that the converted structured object will be written into. Any existing content in this field will be overwritten. |
+| `store_xml` | no | `true` | If `true`, stores the parsed XML structure in the target field. If `false`, only XPath extraction results are stored. |
+| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document. |
+| `ignore_failure` | no | `false` | Ignore failures for the processor. When `true` and XML parsing fails, adds `_xmlparsefailure` tag to the document. See [Handling pipeline failures](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#handling-pipeline-failures). |
+| `to_lower` | no | `false` | Convert XML element names and attribute names to lowercase. |
+| `ignore_empty_value` | no | `false` | If `true`, the processor will filter out null and empty values from the parsed XML structure, including empty elements, elements with null values, and elements with whitespace-only content. |
+| `remove_namespaces` | no | `false` | If `true`, removes namespace prefixes from element and attribute names. |
+| `force_content` | no | `false` | If `true`, forces text content and attributes to always parse to a hash value with `#text` key for content. |
+| `force_array` | no | `false` | If `true`, forces all parsed values to be arrays. Single elements are wrapped in arrays. |
+| `parse_options` | no | - | Controls XML parsing behavior. Set to `"strict"` for strict XML validation that fails fast on invalid content. |
+| `xpath` | no | - | Map of XPath expressions to target field names. Extracts values from the XML using XPath and stores them in the specified fields. |
+| `namespaces` | no | - | Map of namespace prefixes to URIs for use with XPath expressions. Required when XPath expressions contain namespace prefixes. |
+| `description` | no | - | Description of the processor. Useful for describing the purpose of the processor or its configuration. |
+| `if` | no | - | Conditionally execute the processor. See [Conditionally run a processor](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#conditionally-run-processor). |
+| `on_failure` | no | - | Handle failures for the processor. See [Handling pipeline failures](docs-content://manage-data/ingest/transform-enrich/ingest-pipelines.md#handling-pipeline-failures). |
+| `tag` | no | - | Identifier for the processor. Useful for debugging and metrics. |
+
+## Configuration
+
+```js
+{
+ "xml": {
+ "field": "xml_field",
+ "target_field": "parsed_xml",
+ "ignore_empty_value": true
+ }
+}
+```
+
+## Examples
+
+### Basic XML parsing
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content"
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "William H. GaddisThe RecognitionsOne of the great seminal American novels."
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "William H. GaddisThe RecognitionsOne of the great seminal American novels.",
+ "catalog": {
+ "book": {
+ "author": "William H. Gaddis",
+ "title": "The Recognitions",
+ "review": "One of the great seminal American novels."
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+### Filtering empty values
+
+When `ignore_empty_value` is set to `true`, the processor will remove empty elements from the parsed XML:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "target_field": "parsed_xml",
+ "ignore_empty_value": true
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "William H. GaddisOne of the great seminal American novels. Some content"
+ }
+ }
+ ]
+}
+```
+
+Result with empty elements filtered out:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "William H. GaddisOne of the great seminal American novels. Some content",
+ "parsed_xml": {
+ "catalog": {
+ "book": {
+ "author": "William H. Gaddis",
+ "review": "One of the great seminal American novels.",
+ "nested": {
+ "valid_content": "Some content"
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+### Converting element names to lowercase
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "to_lower": true
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "William H. GaddisThe Recognitions"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "William H. GaddisThe Recognitions",
+ "catalog": {
+ "book": {
+ "author": "William H. Gaddis",
+ "title": "The Recognitions"
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+### Handling XML attributes
+
+XML attributes are included as properties in the resulting JSON object alongside element content:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content"
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "The RecognitionsWilliam H. Gaddis"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "The RecognitionsWilliam H. Gaddis",
+ "catalog": {
+ "version": "1.0",
+ "book": {
+ "id": "123",
+ "isbn": "978-0-684-80335-9",
+ "title": {
+ "lang": "en",
+ "#text": "The Recognitions"
+ },
+ "author": {
+ "nationality": "American",
+ "#text": "William H. Gaddis"
+ }
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+### XPath extraction
+
+The XML processor can extract specific values using XPath expressions and store them in designated fields:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "store_xml": false,
+ "xpath": {
+ "//book/title/text()": "book_title",
+ "//book/author/text()": "book_author",
+ "//book/@id": "book_id"
+ }
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "The RecognitionsWilliam H. Gaddis1984George Orwell"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "The RecognitionsWilliam H. Gaddis1984George Orwell",
+ "book_title": ["The Recognitions", "1984"],
+ "book_author": ["William H. Gaddis", "George Orwell"],
+ "book_id": ["123", "456"]
+ }
+ }
+ }
+ ]
+}
+```
+
+### XPath with namespaces
+
+When working with XML that uses namespaces, you need to configure namespace mappings:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "namespaces": {
+ "book": "http://example.com/book",
+ "author": "http://example.com/author"
+ },
+ "xpath": {
+ "//book:catalog/book:item/book:title/text()": "titles",
+ "//author:info/@name": "author_names"
+ }
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "The Recognitions"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "The Recognitions",
+ "titles": "The Recognitions",
+ "author_names": "William H. Gaddis",
+ "book:catalog": {
+ "book:item": {
+ "book:title": "The Recognitions",
+ "author:info": {
+ "name": "William H. Gaddis"
+ }
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+### Force array behavior
+
+When `force_array` is true, all parsed values become arrays:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "force_array": true
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "The Recognitions"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "The Recognitions",
+ "catalog": [
+ {
+ "book": [
+ {
+ "title": ["The Recognitions"]
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ ]
+}
+```
+
+### Strict parsing mode
+
+Use `parse_options: "strict"` for strict XML validation:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "parse_options": "strict",
+ "ignore_failure": true
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "Invalid XML with control character"
+ }
+ }
+ ]
+}
+```
+
+Result (with parsing failure):
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "Invalid XML with control character",
+ "tags": ["_xmlparsefailure"]
+ }
+ }
+ }
+ ]
+}
+```
+
+### Mixed content handling
+
+When XML contains mixed content (text interspersed with elements), text fragments are combined and stored under the special `#text` key:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content"
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "This text is bold and this is italic!"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "This text is bold and this is italic!",
+ "foo": {
+ "b": "bold",
+ "i": "italic",
+ "#text": "This text is and this is !"
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+### Force content mode
+
+When `force_content` is `true`, all element text content is stored under the special `#text` key:
+
+```console
+POST _ingest/pipeline/_simulate
+{
+ "pipeline": {
+ "processors": [
+ {
+ "xml": {
+ "field": "xml_content",
+ "force_content": true
+ }
+ }
+ ]
+ },
+ "docs": [
+ {
+ "_source": {
+ "xml_content": "The Recognitions"
+ }
+ }
+ ]
+}
+```
+
+Result:
+
+```console-result
+{
+ "docs": [
+ {
+ "doc": {
+ ...
+ "_source": {
+ "xml_content": "The Recognitions",
+ "book": {
+ "author": "William H. Gaddis",
+ "#text": "The Recognitions"
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+## XML features
+
+The XML processor supports:
+
+- **Elements with text content**: Converted to key-value pairs where the element name is the key and text content is the value
+- **Nested elements**: Converted to nested JSON objects
+- **Empty elements**: Converted to `null` values (can be filtered with `ignore_empty_value`)
+- **Repeated elements**: Converted to arrays when multiple elements with the same name exist at the same level
+- **XML attributes**: Included as properties in the JSON object alongside element content. When an element has both attributes and text content, the text is stored under a special `#text` key
+- **Mixed content**: Elements with both text and child elements include text under a special `#text` key while attributes and child elements become object properties
+- **Namespaces**: Namespace prefixes are preserved by default and can be used in XPath expressions with the `namespaces` configuration. Use `remove_namespaces: true` to strip namespace prefixes from element names
diff --git a/modules/ingest-common/src/main/java/module-info.java b/modules/ingest-common/src/main/java/module-info.java
index c3b3ab90892d9..84e30519d2d1b 100644
--- a/modules/ingest-common/src/main/java/module-info.java
+++ b/modules/ingest-common/src/main/java/module-info.java
@@ -20,6 +20,8 @@
requires org.apache.lucene.analysis.common;
requires org.jruby.joni;
+ requires java.xml;
+
exports org.elasticsearch.ingest.common; // for painless
opens org.elasticsearch.ingest.common to org.elasticsearch.painless.spi; // whitelist resource access
diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java
index 6e517d644cadb..0dc06e74af3bc 100644
--- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java
+++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java
@@ -74,7 +74,8 @@ public Map getProcessors(Processor.Parameters paramet
entry(TrimProcessor.TYPE, new TrimProcessor.Factory()),
entry(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory()),
entry(UppercaseProcessor.TYPE, new UppercaseProcessor.Factory()),
- entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory())
+ entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()),
+ entry(XmlProcessor.TYPE, new XmlProcessor.Factory())
);
}
diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/XmlProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/XmlProcessor.java
new file mode 100644
index 0000000000000..3c2fac023fa20
--- /dev/null
+++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/XmlProcessor.java
@@ -0,0 +1,967 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.ingest.common;
+
+import org.elasticsearch.cluster.metadata.ProjectId;
+import org.elasticsearch.ingest.AbstractProcessor;
+import org.elasticsearch.ingest.ConfigurationUtils;
+import org.elasticsearch.ingest.IngestDocument;
+import org.elasticsearch.ingest.Processor;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import javax.xml.namespace.NamespaceContext;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
+/**
+ * Processor that parses XML documents and converts them to JSON objects using a single-pass streaming approach.
+ *
+ * Features:
+ * - XML to JSON conversion with configurable structure options
+ * - XPath extraction with namespace support
+ * - Configurable options: force_array, force_content, remove_namespaces, to_lower
+ * - Strict parsing mode for XML validation
+ * - Empty value filtering with ignore_empty_value option
+ * - Logstash-compatible error handling and behavior
+ */
+public final class XmlProcessor extends AbstractProcessor {
+
+ public static final String TYPE = "xml";
+
+ private static final XPathFactory XPATH_FACTORY = XPathFactory.newInstance();
+
+ // Pre-compiled pattern to detect namespace prefixes
+ private static final Pattern NAMESPACE_PATTERN = Pattern.compile(".*\\b[a-zA-Z][a-zA-Z0-9_-]*:[a-zA-Z][a-zA-Z0-9_-]*.*");
+
+ // Pre-configured SAX parser factories for secure XML parsing
+ private static final javax.xml.parsers.SAXParserFactory SAX_PARSER_FACTORY = createSecureSaxParserFactory();
+ private static final javax.xml.parsers.SAXParserFactory SAX_PARSER_FACTORY_NS = createSecureSaxParserFactoryNamespaceAware();
+ private static final javax.xml.parsers.SAXParserFactory SAX_PARSER_FACTORY_STRICT = createSecureSaxParserFactoryStrict();
+ private static final javax.xml.parsers.SAXParserFactory SAX_PARSER_FACTORY_NS_STRICT =
+ createSecureSaxParserFactoryNamespaceAwareStrict();
+
+ // Pre-configured document builder factory for DOM creation
+ private static final DocumentBuilderFactory DOM_FACTORY = createSecureDocumentBuilderFactory();
+
+ private final String field;
+ private final String targetField;
+ private final boolean ignoreMissing;
+ private final boolean ignoreFailure;
+ private final boolean toLower;
+ private final boolean ignoreEmptyValue;
+ private final boolean storeXml;
+ private final boolean removeNamespaces;
+ private final boolean forceContent;
+ private final boolean forceArray;
+ private final Map xpathExpressions;
+ private final Map namespaces;
+ private final Map compiledXPathExpressions;
+ private final String parseOptions;
+
+ XmlProcessor(
+ String tag,
+ String description,
+ String field,
+ String targetField,
+ boolean ignoreMissing,
+ boolean ignoreFailure,
+ boolean toLower,
+ boolean ignoreEmptyValue,
+ boolean storeXml,
+ boolean removeNamespaces,
+ boolean forceContent,
+ boolean forceArray,
+ Map xpathExpressions,
+ Map namespaces,
+ String parseOptions
+ ) {
+ super(tag, description);
+ this.field = field;
+ this.targetField = targetField;
+ this.ignoreMissing = ignoreMissing;
+ this.ignoreFailure = ignoreFailure;
+ this.toLower = toLower;
+ this.ignoreEmptyValue = ignoreEmptyValue;
+ this.storeXml = storeXml;
+ this.removeNamespaces = removeNamespaces;
+ this.forceContent = forceContent;
+ this.forceArray = forceArray;
+ this.xpathExpressions = xpathExpressions != null ? Map.copyOf(xpathExpressions) : Map.of();
+ this.namespaces = namespaces != null ? Map.copyOf(namespaces) : Map.of();
+ this.compiledXPathExpressions = compileXPathExpressions(this.xpathExpressions, this.namespaces);
+ this.parseOptions = parseOptions != null ? parseOptions : "";
+ }
+
+ public String getField() {
+ return field;
+ }
+
+ public String getTargetField() {
+ return targetField;
+ }
+
+ public boolean isIgnoreMissing() {
+ return ignoreMissing;
+ }
+
+ public boolean isIgnoreEmptyValue() {
+ return ignoreEmptyValue;
+ }
+
+ public boolean isStoreXml() {
+ return storeXml;
+ }
+
+ public boolean isRemoveNamespaces() {
+ return removeNamespaces;
+ }
+
+ public boolean isForceContent() {
+ return forceContent;
+ }
+
+ public boolean isStrict() {
+ return "strict".equals(parseOptions);
+ }
+
+ public boolean isForceArray() {
+ return forceArray;
+ }
+
+ public boolean hasNamespaces() {
+ return namespaces.isEmpty() == false;
+ }
+
+ public Map getNamespaces() {
+ return namespaces;
+ }
+
+ public String getParseOptions() {
+ return parseOptions;
+ }
+
+ @Override
+ public IngestDocument execute(IngestDocument document) {
+ Object fieldValue = document.getFieldValue(field, Object.class, ignoreMissing);
+
+ if (fieldValue == null) {
+ if (ignoreMissing || ignoreFailure) {
+ return document;
+ }
+ throw new IllegalArgumentException("field [" + field + "] is null, cannot parse XML");
+ }
+
+ if (fieldValue instanceof String == false) {
+ if (ignoreFailure) {
+ return document;
+ }
+ throw new IllegalArgumentException("field [" + field + "] is not a string, cannot parse XML");
+ }
+
+ String xmlString = (String) fieldValue;
+ try {
+ // Always use streaming parser for optimal performance and memory usage
+ if (storeXml || xpathExpressions.isEmpty() == false) {
+ parseXmlAndXPath(document, xmlString.trim());
+ }
+ } catch (Exception e) {
+ if (ignoreFailure) {
+ // Add failure tag similar to Logstash behavior
+ document.appendFieldValue("tags", "_xmlparsefailure");
+ return document;
+ }
+ throw new IllegalArgumentException("field [" + field + "] contains invalid XML: " + e.getMessage(), e);
+ }
+
+ return document;
+ }
+
+ @Override
+ public String getType() {
+ return TYPE;
+ }
+
+ /**
+ * Determines if a value should be considered empty for filtering purposes.
+ * Used by the ignore_empty_value feature to filter out empty content.
+ *
+ * Considers empty:
+ * - null values
+ * - empty or whitespace-only strings
+ * - empty Maps
+ * - empty Lists
+ *
+ * @param value the value to check
+ * @return true if the value should be considered empty
+ */
+ private boolean isEmptyValue(Object value) {
+ if (value == null) {
+ return true;
+ }
+ if (value instanceof String) {
+ return ((String) value).trim().isEmpty();
+ }
+ if (value instanceof Map) {
+ return ((Map, ?>) value).isEmpty();
+ }
+ if (value instanceof List) {
+ return ((List>) value).isEmpty();
+ }
+ return false;
+ }
+
+ /**
+ * Extract the text value from a DOM node for XPath result processing.
+ * Handles different node types appropriately:
+ * - TEXT_NODE and CDATA_SECTION_NODE: returns node value directly
+ * - ATTRIBUTE_NODE: returns attribute value
+ * - ELEMENT_NODE: returns text content (concatenated text of all descendants)
+ * - Other node types: returns text content as fallback
+ *
+ * @param node the DOM node to extract text from
+ * @return the text content of the node, or null if node is null
+ */
+ private String getNodeValue(Node node) {
+ if (node == null) {
+ return null;
+ }
+
+ switch (node.getNodeType()) {
+ case Node.ATTRIBUTE_NODE:
+ case Node.CDATA_SECTION_NODE:
+ case Node.TEXT_NODE:
+ return node.getNodeValue();
+ case Node.ELEMENT_NODE:
+ default:
+ return node.getTextContent();
+ }
+ }
+
+ /**
+ * Applies force_array logic to ensure all fields are arrays when enabled.
+ *
+ * Behavior:
+ * - If force_array is false: returns content unchanged
+ * - If force_array is true and content is already a List: returns content unchanged
+ * - If force_array is true and content is not a List: wraps content in a new ArrayList
+ * - Handles null content appropriately (wraps null in array if force_array is true)
+ *
+ * @param elementName the name of the element (for context, not used in current implementation)
+ * @param content the content to potentially wrap in an array
+ * @return the content, optionally wrapped in an array based on force_array setting
+ */
+ private Object applyForceArray(String elementName, Object content) {
+ if (forceArray && !(content instanceof List)) {
+ List