Skip to content

Commit 8b6a737

Browse files
committed
test
1 parent 9bacb63 commit 8b6a737

File tree

8 files changed

+593
-0
lines changed

8 files changed

+593
-0
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/** @jsx jsx */
2+
3+
import { jsx } from '@platejs/test-utils';
4+
5+
import { getDocxTestName, testDocxImporter } from './testDocxImporter';
6+
7+
jsx;
8+
9+
const name = 'block_quotes';
10+
11+
// mammoth output: blockquote style NOT preserved (becomes p), italic lost
12+
// NOTE: mammoth doesn't recognize Word's blockquote style
13+
// Uses smart quotes (U+2019) from Word
14+
describe(getDocxTestName(name), () => {
15+
testDocxImporter({
16+
expected: (
17+
<editor>
18+
<hh2>Some block quotes, in different ways</hh2>
19+
<hp>This is the proper way, with a style</hp>
20+
<hp>
21+
I don{'\u2019'}t know why this would be in italics, but so it appears
22+
to be on my screen.
23+
</hp>
24+
<hp>And this is the way that most people do it:</hp>
25+
<hp>
26+
I just indented this, so it looks like a block quote. I think this is
27+
how most people do block quotes in their documents.
28+
</hp>
29+
<hp>And back to the normal style.</hp>
30+
</editor>
31+
),
32+
filename: name,
33+
});
34+
});
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/** @jsx jsx */
2+
3+
import { jsx } from '@platejs/test-utils';
4+
5+
import { getDocxTestName, testDocxImporter } from './testDocxImporter';
6+
7+
jsx;
8+
9+
const name = 'headers';
10+
11+
// mammoth output: h1-h6 preserved, no font styles
12+
describe(getDocxTestName(name), () => {
13+
testDocxImporter({
14+
expected: (
15+
<editor>
16+
<hh1>A Test of Headers</hh1>
17+
<hh2>Second Level</hh2>
18+
<hp>Some plain text.</hp>
19+
<hh3>Third level</hh3>
20+
<hp>Some more plain text.</hp>
21+
<hh4>Fourth level</hh4>
22+
<hp>Some more plain text.</hp>
23+
<hh5>Fifth level</hh5>
24+
<hp>Some more plain text.</hp>
25+
<hh6>Sixth level</hh6>
26+
<hp>Some more plain text.</hp>
27+
<hp>Seventh level</hp>
28+
<hp>
29+
Since no Heading 7 style exists in styles.xml, this gets converted to
30+
Span.
31+
</hp>
32+
</editor>
33+
),
34+
filename: name,
35+
});
36+
});
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/** @jsx jsx */
2+
3+
import { jsx } from '@platejs/test-utils';
4+
5+
import { getDocxTestName, testDocxImporter } from './testDocxImporter';
6+
7+
jsx;
8+
9+
const name = 'inline_formatting';
10+
11+
// mammoth output: bold=<strong>, italic=<em>, strikethrough=<s>, sup/sub preserved
12+
// NOTE: underline is lost by mammoth
13+
describe(getDocxTestName(name), () => {
14+
testDocxImporter({
15+
expected: (
16+
<editor>
17+
<hp>
18+
Regular text <htext italic>italics</htext> <htext bold>bold </htext>
19+
<htext bold italic>
20+
bold italics
21+
</htext>
22+
.
23+
</hp>
24+
<hp>
25+
This is Small Caps, and this is{' '}
26+
<htext strikethrough>strikethrough</htext>.
27+
</hp>
28+
<hp>
29+
Some people use single underlines for <htext italic>emphasis</htext>.
30+
</hp>
31+
<hp>
32+
Above the line is <htext superscript>superscript</htext> and below the
33+
line is <htext subscript>subscript</htext>.
34+
</hp>
35+
<hp>A line{`\n`}break.</hp>
36+
</editor>
37+
),
38+
filename: name,
39+
});
40+
});
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/** @jsx jsx */
2+
3+
import { jsx } from '@platejs/test-utils';
4+
5+
import { getDocxTestName, testDocxImporter } from './testDocxImporter';
6+
7+
jsx;
8+
9+
const name = 'links';
10+
11+
// mammoth output: links preserved with target="_blank"
12+
describe(getDocxTestName(name), () => {
13+
testDocxImporter({
14+
expected: (
15+
<editor>
16+
<hh2>An internal link and an external link</hh2>
17+
<hp>
18+
An{' '}
19+
<ha url="http://google.com" target="_blank">
20+
external link
21+
</ha>{' '}
22+
to a popular website.
23+
</hp>
24+
<hp>
25+
An{' '}
26+
<ha url="http://pandoc.org/README.html#synopsis" target="_blank">
27+
external link
28+
</ha>{' '}
29+
to a website with an anchor.
30+
</hp>
31+
<hp>
32+
An{' '}
33+
<ha url="#_A_section_for" target="_blank">
34+
internal link
35+
</ha>{' '}
36+
to a section header.
37+
</hp>
38+
<hp>
39+
An{' '}
40+
<ha url="#my_bookmark" target="_blank">
41+
internal link
42+
</ha>{' '}
43+
to a bookmark.
44+
</hp>
45+
<hh2>A section for testing link targets</hh2>
46+
<hp>A bookmark right here</hp>
47+
</editor>
48+
),
49+
filename: name,
50+
});
51+
});
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/** @jsx jsx */
2+
3+
import { jsx } from '@platejs/test-utils';
4+
5+
import { getDocxTestName, testDocxImporter } from './testDocxImporter';
6+
7+
jsx;
8+
9+
const name = 'lists';
10+
11+
// TODO: ListPlugin uses indent-based lists, not nested structure
12+
// mammoth output: ol/ul preserved with nesting
13+
describe.skip(getDocxTestName(name), () => {
14+
testDocxImporter({
15+
expected: (
16+
<editor>
17+
<hh2>Some nested lists</hh2>
18+
<hol>
19+
<hli>
20+
<hlic>one</hlic>
21+
</hli>
22+
<hli>
23+
<hlic>two</hlic>
24+
<hol>
25+
<hli>
26+
<hlic>a</hlic>
27+
</hli>
28+
<hli>
29+
<hlic>b</hlic>
30+
</hli>
31+
</hol>
32+
</hli>
33+
</hol>
34+
<hul>
35+
<hli>
36+
<hlic>one</hlic>
37+
</hli>
38+
<hli>
39+
<hlic>two</hlic>
40+
<hul>
41+
<hli>
42+
<hlic>three</hlic>
43+
<hul>
44+
<hli>
45+
<hlic>four</hlic>
46+
</hli>
47+
</hul>
48+
</hli>
49+
</hul>
50+
</hli>
51+
</hul>
52+
<hp>Sub paragraph</hp>
53+
<hul>
54+
<hli>
55+
<hlic>Same list</hlic>
56+
</hli>
57+
<hli>
58+
<hlic>Different list adjacent to the one above.</hlic>
59+
</hli>
60+
</hul>
61+
</editor>
62+
),
63+
filename: name,
64+
});
65+
});
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/** @jsx jsx */
2+
3+
import fs from 'node:fs';
4+
import path from 'node:path';
5+
6+
import { cleanDocx } from '@platejs/docx';
7+
import type { SlatePlugin, TNode, Value } from 'platejs';
8+
import { createSlateEditor } from 'platejs';
9+
import { serializeHtml } from 'platejs/static';
10+
import { jsx } from '@platejs/test-utils';
11+
import mammoth from 'mammoth';
12+
import { BaseEditorKit } from 'www/src/registry/components/editor/editor-base-kit';
13+
import { DocxExportKit } from 'www/src/registry/components/editor/plugins/docx-export-kit';
14+
15+
import { htmlToDocxBlob } from '../html-to-docx';
16+
import { preprocessMammothHtml } from '../preprocessMammothHtml';
17+
18+
jsx;
19+
20+
const editorPlugins = [...BaseEditorKit, ...DocxExportKit] as SlatePlugin[];
21+
22+
const createTestEditor = (value?: Value) =>
23+
createSlateEditor({
24+
plugins: editorPlugins,
25+
value,
26+
});
27+
28+
const readDocxFixture = (filename: string): Buffer => {
29+
const docxTestDir = path.resolve(
30+
__dirname,
31+
'../../../../docx/src/lib/__tests__'
32+
);
33+
const filepath = path.join(docxTestDir, `${filename}.docx`);
34+
35+
return fs.readFileSync(filepath);
36+
};
37+
38+
const importDocxBuffer = async (
39+
editor: ReturnType<typeof createTestEditor>,
40+
buffer: Buffer
41+
): Promise<TNode[]> => {
42+
const mammothResult = await mammoth.convertToHtml(
43+
{ buffer },
44+
{ styleMap: ['comment-reference => sup'] }
45+
);
46+
47+
const { html: preprocessedHtml } = preprocessMammothHtml(mammothResult.value);
48+
const cleanedHtml = cleanDocx(preprocessedHtml, '');
49+
50+
const doc = new DOMParser().parseFromString(cleanedHtml, 'text/html');
51+
52+
return editor.api.html.deserialize({ element: doc.body }) as TNode[];
53+
};
54+
55+
const exportNodesToDocx = async (nodes: TNode[]): Promise<Buffer> => {
56+
// Create a static editor for serialization with registered components
57+
const staticEditor = createTestEditor(nodes as Value);
58+
59+
// Serialize nodes to HTML using platejs/static
60+
const html = await serializeHtml(staticEditor);
61+
62+
// Convert HTML to DOCX blob
63+
const blob = await htmlToDocxBlob(html);
64+
65+
// Convert Blob to Buffer for reimport
66+
const arrayBuffer = await blob.arrayBuffer();
67+
68+
return Buffer.from(arrayBuffer);
69+
};
70+
71+
/**
72+
* Roundtrip test: import → export → reimport
73+
*
74+
* Verifies that export doesn't lose data that import can parse.
75+
* B === D means the roundtrip preserves data.
76+
*
77+
* Original .docx → import → Plate nodes (B) → export → new .docx → reimport → Plate nodes (D)
78+
*
79+
* Known limitations in roundtrip:
80+
* - inline_formatting: loses some marks due to HTML serialization/deserialization
81+
* - Line breaks (\n) may be converted to spaces
82+
*/
83+
describe('docx roundtrip', () => {
84+
// Fixtures that pass full roundtrip (B === D)
85+
// Note: 'links' has minor URL normalization (trailing slash) so tested separately
86+
const roundtripFixtures = ['headers', 'block_quotes', 'tables'];
87+
88+
roundtripFixtures.forEach((name) => {
89+
it(`should preserve data for ${name}`, async () => {
90+
const editor = createTestEditor();
91+
92+
// 1. Import original .docx
93+
const buffer = readDocxFixture(name);
94+
const nodesB = await importDocxBuffer(editor, buffer);
95+
96+
// 2. Export to new .docx
97+
const exportedBuffer = await exportNodesToDocx(nodesB);
98+
99+
// 3. Reimport the exported .docx
100+
const nodesD = await importDocxBuffer(editor, exportedBuffer);
101+
102+
// 4. Compare - should be identical
103+
expect(nodesD).toEqual(nodesB);
104+
});
105+
});
106+
107+
// Test links - passes but with minor URL normalization (trailing slash added)
108+
it('should preserve data for links (with URL normalization)', async () => {
109+
const editor = createTestEditor();
110+
111+
const buffer = readDocxFixture('links');
112+
const nodesB = await importDocxBuffer(editor, buffer);
113+
const exportedBuffer = await exportNodesToDocx(nodesB);
114+
const nodesD = await importDocxBuffer(editor, exportedBuffer);
115+
116+
// Normalize URLs for comparison (add trailing slash to domain-only URLs)
117+
const normalizeUrls = (nodes: TNode[]) =>
118+
JSON.parse(
119+
JSON.stringify(nodes).replaceAll(
120+
/"url":"(https?:\/\/[^"/]+)"/g,
121+
'"url":"$1/"'
122+
)
123+
);
124+
125+
expect(normalizeUrls(nodesD)).toEqual(normalizeUrls(nodesB));
126+
});
127+
128+
// Test that inline_formatting can be exported and reimported (with known data loss)
129+
it('should export and reimport inline_formatting (with known loss)', async () => {
130+
const editor = createTestEditor();
131+
132+
// 1. Import original .docx
133+
const buffer = readDocxFixture('inline_formatting');
134+
const nodesB = await importDocxBuffer(editor, buffer);
135+
136+
// 2. Export to new .docx
137+
const exportedBuffer = await exportNodesToDocx(nodesB);
138+
139+
// 3. Reimport the exported .docx - should not throw
140+
const nodesD = await importDocxBuffer(editor, exportedBuffer);
141+
142+
// 4. Verify we got some content back (not empty)
143+
expect(nodesD.length).toBeGreaterThan(0);
144+
// Note: nodesD won't equal nodesB due to mark/linebreak loss
145+
});
146+
});

0 commit comments

Comments
 (0)