test

felixfeng33 · felixfeng33 · commit 8b6a737836f4 · 2026-01-20T15:20:25.000+08:00
diff --git a/packages/docx-io/src/lib/__tests__/block_quotes.spec.tsx b/packages/docx-io/src/lib/__tests__/block_quotes.spec.tsx
@@ -0,0 +1,34 @@
+/** @jsx jsx */
+
+import { jsx } from '@platejs/test-utils';
+
+import { getDocxTestName, testDocxImporter } from './testDocxImporter';
+
+jsx;
+
+const name = 'block_quotes';
+
+// mammoth output: blockquote style NOT preserved (becomes p), italic lost
+// NOTE: mammoth doesn't recognize Word's blockquote style
+// Uses smart quotes (U+2019) from Word
+describe(getDocxTestName(name), () => {
+  testDocxImporter({
+    expected: (
+      <editor>
+        <hh2>Some block quotes, in different ways</hh2>
+        <hp>This is the proper way, with a style</hp>
+        <hp>
+          I don{'\u2019'}t know why this would be in italics, but so it appears
+          to be on my screen.
+        </hp>
+        <hp>And this is the way that most people do it:</hp>
+        <hp>
+          I just indented this, so it looks like a block quote. I think this is
+          how most people do block quotes in their documents.
+        </hp>
+        <hp>And back to the normal style.</hp>
+      </editor>
+    ),
+    filename: name,
+  });
+});
diff --git a/packages/docx-io/src/lib/__tests__/headers.spec.tsx b/packages/docx-io/src/lib/__tests__/headers.spec.tsx
@@ -0,0 +1,36 @@
+/** @jsx jsx */
+
+import { jsx } from '@platejs/test-utils';
+
+import { getDocxTestName, testDocxImporter } from './testDocxImporter';
+
+jsx;
+
+const name = 'headers';
+
+// mammoth output: h1-h6 preserved, no font styles
+describe(getDocxTestName(name), () => {
+  testDocxImporter({
+    expected: (
+      <editor>
+        <hh1>A Test of Headers</hh1>
+        <hh2>Second Level</hh2>
+        <hp>Some plain text.</hp>
+        <hh3>Third level</hh3>
+        <hp>Some more plain text.</hp>
+        <hh4>Fourth level</hh4>
+        <hp>Some more plain text.</hp>
+        <hh5>Fifth level</hh5>
+        <hp>Some more plain text.</hp>
+        <hh6>Sixth level</hh6>
+        <hp>Some more plain text.</hp>
+        <hp>Seventh level</hp>
+        <hp>
+          Since no Heading 7 style exists in styles.xml, this gets converted to
+          Span.
+        </hp>
+      </editor>
+    ),
+    filename: name,
+  });
+});
diff --git a/packages/docx-io/src/lib/__tests__/inline_formatting.spec.tsx b/packages/docx-io/src/lib/__tests__/inline_formatting.spec.tsx
@@ -0,0 +1,40 @@
+/** @jsx jsx */
+
+import { jsx } from '@platejs/test-utils';
+
+import { getDocxTestName, testDocxImporter } from './testDocxImporter';
+
+jsx;
+
+const name = 'inline_formatting';
+
+// mammoth output: bold=<strong>, italic=<em>, strikethrough=<s>, sup/sub preserved
+// NOTE: underline is lost by mammoth
+describe(getDocxTestName(name), () => {
+  testDocxImporter({
+    expected: (
+      <editor>
+        <hp>
+          Regular text <htext italic>italics</htext> <htext bold>bold </htext>
+          <htext bold italic>
+            bold italics
+          </htext>
+          .
+        </hp>
+        <hp>
+          This is Small Caps, and this is{' '}
+          <htext strikethrough>strikethrough</htext>.
+        </hp>
+        <hp>
+          Some people use single underlines for <htext italic>emphasis</htext>.
+        </hp>
+        <hp>
+          Above the line is <htext superscript>superscript</htext> and below the
+          line is <htext subscript>subscript</htext>.
+        </hp>
+        <hp>A line{`\n`}break.</hp>
+      </editor>
+    ),
+    filename: name,
+  });
+});
diff --git a/packages/docx-io/src/lib/__tests__/links.spec.tsx b/packages/docx-io/src/lib/__tests__/links.spec.tsx
@@ -0,0 +1,51 @@
+/** @jsx jsx */
+
+import { jsx } from '@platejs/test-utils';
+
+import { getDocxTestName, testDocxImporter } from './testDocxImporter';
+
+jsx;
+
+const name = 'links';
+
+// mammoth output: links preserved with target="_blank"
+describe(getDocxTestName(name), () => {
+  testDocxImporter({
+    expected: (
+      <editor>
+        <hh2>An internal link and an external link</hh2>
+        <hp>
+          An{' '}
+          <ha url="http://google.com" target="_blank">
+            external link
+          </ha>{' '}
+          to a popular website.
+        </hp>
+        <hp>
+          An{' '}
+          <ha url="http://pandoc.org/README.html#synopsis" target="_blank">
+            external link
+          </ha>{' '}
+          to a website with an anchor.
+        </hp>
+        <hp>
+          An{' '}
+          <ha url="#_A_section_for" target="_blank">
+            internal link
+          </ha>{' '}
+          to a section header.
+        </hp>
+        <hp>
+          An{' '}
+          <ha url="#my_bookmark" target="_blank">
+            internal link
+          </ha>{' '}
+          to a bookmark.
+        </hp>
+        <hh2>A section for testing link targets</hh2>
+        <hp>A bookmark right here</hp>
+      </editor>
+    ),
+    filename: name,
+  });
+});
diff --git a/packages/docx-io/src/lib/__tests__/lists.spec.tsx b/packages/docx-io/src/lib/__tests__/lists.spec.tsx
@@ -0,0 +1,65 @@
+/** @jsx jsx */
+
+import { jsx } from '@platejs/test-utils';
+
+import { getDocxTestName, testDocxImporter } from './testDocxImporter';
+
+jsx;
+
+const name = 'lists';
+
+// TODO: ListPlugin uses indent-based lists, not nested structure
+// mammoth output: ol/ul preserved with nesting
+describe.skip(getDocxTestName(name), () => {
+  testDocxImporter({
+    expected: (
+      <editor>
+        <hh2>Some nested lists</hh2>
+        <hol>
+          <hli>
+            <hlic>one</hlic>
+          </hli>
+          <hli>
+            <hlic>two</hlic>
+            <hol>
+              <hli>
+                <hlic>a</hlic>
+              </hli>
+              <hli>
+                <hlic>b</hlic>
+              </hli>
+            </hol>
+          </hli>
+        </hol>
+        <hul>
+          <hli>
+            <hlic>one</hlic>
+          </hli>
+          <hli>
+            <hlic>two</hlic>
+            <hul>
+              <hli>
+                <hlic>three</hlic>
+                <hul>
+                  <hli>
+                    <hlic>four</hlic>
+                  </hli>
+                </hul>
+              </hli>
+            </hul>
+          </hli>
+        </hul>
+        <hp>Sub paragraph</hp>
+        <hul>
+          <hli>
+            <hlic>Same list</hlic>
+          </hli>
+          <hli>
+            <hlic>Different list adjacent to the one above.</hlic>
+          </hli>
+        </hul>
+      </editor>
+    ),
+    filename: name,
+  });
+});
diff --git a/packages/docx-io/src/lib/__tests__/roundtrip.spec.tsx b/packages/docx-io/src/lib/__tests__/roundtrip.spec.tsx
@@ -0,0 +1,146 @@
+/** @jsx jsx */
+
+import fs from 'node:fs';
+import path from 'node:path';
+
+import { cleanDocx } from '@platejs/docx';
+import type { SlatePlugin, TNode, Value } from 'platejs';
+import { createSlateEditor } from 'platejs';
+import { serializeHtml } from 'platejs/static';
+import { jsx } from '@platejs/test-utils';
+import mammoth from 'mammoth';
+import { BaseEditorKit } from 'www/src/registry/components/editor/editor-base-kit';
+import { DocxExportKit } from 'www/src/registry/components/editor/plugins/docx-export-kit';
+
+import { htmlToDocxBlob } from '../html-to-docx';
+import { preprocessMammothHtml } from '../preprocessMammothHtml';
+
+jsx;
+
+const editorPlugins = [...BaseEditorKit, ...DocxExportKit] as SlatePlugin[];
+
+const createTestEditor = (value?: Value) =>
+  createSlateEditor({
+    plugins: editorPlugins,
+    value,
+  });
+
+const readDocxFixture = (filename: string): Buffer => {
+  const docxTestDir = path.resolve(
+    __dirname,
+    '../../../../docx/src/lib/__tests__'
+  );
+  const filepath = path.join(docxTestDir, `${filename}.docx`);
+
+  return fs.readFileSync(filepath);
+};
+
+const importDocxBuffer = async (
+  editor: ReturnType<typeof createTestEditor>,
+  buffer: Buffer
+): Promise<TNode[]> => {
+  const mammothResult = await mammoth.convertToHtml(
+    { buffer },
+    { styleMap: ['comment-reference => sup'] }
+  );
+
+  const { html: preprocessedHtml } = preprocessMammothHtml(mammothResult.value);
+  const cleanedHtml = cleanDocx(preprocessedHtml, '');
+
+  const doc = new DOMParser().parseFromString(cleanedHtml, 'text/html');
+
+  return editor.api.html.deserialize({ element: doc.body }) as TNode[];
+};
+
+const exportNodesToDocx = async (nodes: TNode[]): Promise<Buffer> => {
+  // Create a static editor for serialization with registered components
+  const staticEditor = createTestEditor(nodes as Value);
+
+  // Serialize nodes to HTML using platejs/static
+  const html = await serializeHtml(staticEditor);
+
+  // Convert HTML to DOCX blob
+  const blob = await htmlToDocxBlob(html);
+
+  // Convert Blob to Buffer for reimport
+  const arrayBuffer = await blob.arrayBuffer();
+
+  return Buffer.from(arrayBuffer);
+};
+
+/**
+ * Roundtrip test: import → export → reimport
+ *
+ * Verifies that export doesn't lose data that import can parse.
+ * B === D means the roundtrip preserves data.
+ *
+ * Original .docx → import → Plate nodes (B) → export → new .docx → reimport → Plate nodes (D)
+ *
+ * Known limitations in roundtrip:
+ * - inline_formatting: loses some marks due to HTML serialization/deserialization
+ * - Line breaks (\n) may be converted to spaces
+ */
+describe('docx roundtrip', () => {
+  // Fixtures that pass full roundtrip (B === D)
+  // Note: 'links' has minor URL normalization (trailing slash) so tested separately
+  const roundtripFixtures = ['headers', 'block_quotes', 'tables'];
+
+  roundtripFixtures.forEach((name) => {
+    it(`should preserve data for ${name}`, async () => {
+      const editor = createTestEditor();
+
+      // 1. Import original .docx
+      const buffer = readDocxFixture(name);
+      const nodesB = await importDocxBuffer(editor, buffer);
+
+      // 2. Export to new .docx
+      const exportedBuffer = await exportNodesToDocx(nodesB);
+
+      // 3. Reimport the exported .docx
+      const nodesD = await importDocxBuffer(editor, exportedBuffer);
+
+      // 4. Compare - should be identical
+      expect(nodesD).toEqual(nodesB);
+    });
+  });
+
+  // Test links - passes but with minor URL normalization (trailing slash added)
+  it('should preserve data for links (with URL normalization)', async () => {
+    const editor = createTestEditor();
+
+    const buffer = readDocxFixture('links');
+    const nodesB = await importDocxBuffer(editor, buffer);
+    const exportedBuffer = await exportNodesToDocx(nodesB);
+    const nodesD = await importDocxBuffer(editor, exportedBuffer);
+
+    // Normalize URLs for comparison (add trailing slash to domain-only URLs)
+    const normalizeUrls = (nodes: TNode[]) =>
+      JSON.parse(
+        JSON.stringify(nodes).replaceAll(
+          /"url":"(https?:\/\/[^"/]+)"/g,
+          '"url":"$1/"'
+        )
+      );
+
+    expect(normalizeUrls(nodesD)).toEqual(normalizeUrls(nodesB));
+  });
+
+  // Test that inline_formatting can be exported and reimported (with known data loss)
+  it('should export and reimport inline_formatting (with known loss)', async () => {
+    const editor = createTestEditor();
+
+    // 1. Import original .docx
+    const buffer = readDocxFixture('inline_formatting');
+    const nodesB = await importDocxBuffer(editor, buffer);
+
+    // 2. Export to new .docx
+    const exportedBuffer = await exportNodesToDocx(nodesB);
+
+    // 3. Reimport the exported .docx - should not throw
+    const nodesD = await importDocxBuffer(editor, exportedBuffer);
+
+    // 4. Verify we got some content back (not empty)
+    expect(nodesD.length).toBeGreaterThan(0);
+    // Note: nodesD won't equal nodesB due to mark/linebreak loss
+  });
+});
diff --git a/packages/docx-io/src/lib/__tests__/tables.spec.tsx b/packages/docx-io/src/lib/__tests__/tables.spec.tsx
diff --git a/packages/docx-io/src/lib/__tests__/testDocxImporter.tsx b/packages/docx-io/src/lib/__tests__/testDocxImporter.tsx