diff --git a/packages/amis-core/src/utils/debug.tsx b/packages/amis-core/src/utils/debug.tsx index 97f3f0a82..5364e060e 100644 --- a/packages/amis-core/src/utils/debug.tsx +++ b/packages/amis-core/src/utils/debug.tsx @@ -385,6 +385,7 @@ export function enableDebug() { interface DebugWrapperProps { renderer: any; + children?: React.ReactNode; } export class DebugWrapper extends Component { diff --git a/packages/office-viewer/README.md b/packages/office-viewer/README.md index 08d544a03..b08af991c 100644 --- a/packages/office-viewer/README.md +++ b/packages/office-viewer/README.md @@ -6,6 +6,10 @@ docx 渲染器,原理是将 docx 里的 xml 格式转成 html 相对于 Canvas 渲染,这个实现方案比较简单,最终页面也可以很方便复制,但无法保证和原始 docx 文件展现一致,因为有部分功能难以在 HTML 中实现,比如图文环绕效果。 +## 还不支持的功能 + +- wmf,需要使用 https://github.com/SheetJS/js-wmf + ## 参考资料 - [官方规范](https://www.ecma-international.org/publications-and-standards/standards/ecma-376/) diff --git a/packages/office-viewer/__tests__/docx/simple/text.docx b/packages/office-viewer/__tests__/docx/simple/text.docx new file mode 100644 index 000000000..ed371f9e8 Binary files /dev/null and b/packages/office-viewer/__tests__/docx/simple/text.docx differ diff --git a/packages/office-viewer/__tests__/util/autoSpace.test.ts b/packages/office-viewer/__tests__/util/autoSpace.test.ts new file mode 100644 index 000000000..1b9918afa --- /dev/null +++ b/packages/office-viewer/__tests__/util/autoSpace.test.ts @@ -0,0 +1,9 @@ +import {cjkspace} from '../../src/util/autoSpace'; + +test('autoSpace', async () => { + expect(cjkspace('a中'.split(''))).toBe('a 中'); +}); + +test('autoSpace 2', async () => { + expect(cjkspace('abc中def,测试'.split(''))).toBe('abc 中 def,测试'); +}); diff --git a/packages/office-viewer/src/openxml/word/AlternateContent.ts b/packages/office-viewer/src/openxml/word/AlternateContent.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/office-viewer/src/openxml/word/Body.ts b/packages/office-viewer/src/openxml/word/Body.ts index 37b8b1a41..df9265400 100644 --- a/packages/office-viewer/src/openxml/word/Body.ts +++ b/packages/office-viewer/src/openxml/word/Body.ts @@ -52,6 +52,9 @@ export class Body { body.addChild(table); break; + case 'w:bookmarkEnd': + break; + default: console.warn('Body.fromXML Unknown key', tagName, child); } diff --git a/packages/office-viewer/src/openxml/word/Paragraph.ts b/packages/office-viewer/src/openxml/word/Paragraph.ts index 6e605b157..a9074c5a8 100644 --- a/packages/office-viewer/src/openxml/word/Paragraph.ts +++ b/packages/office-viewer/src/openxml/word/Paragraph.ts @@ -21,6 +21,11 @@ export interface ParagraphPr extends Properties { numPr?: NumberPr; runPr?: RunPr; tabs?: Tab[]; + + /** + * 其实是区分 autoSpaceDN 和 autoSpaceDE 的,但这里简化了 + */ + autoSpace?: boolean; } export type ParagraphChild = @@ -45,6 +50,12 @@ export type ParagraphChild = // | CommentRangeEnd // | CommentReference; +function parseAutoSpace(element: Element): boolean { + const autoSpaceDE = element.getElementsByTagName('w:autoSpaceDE').item(0); + const autoSpaceDN = element.getElementsByTagName('w:autoSpaceDN').item(0); + return !!autoSpaceDE || !!autoSpaceDN; +} + export class Paragraph { // 主要是为了方便调试用的 paraId?: string; @@ -78,7 +89,9 @@ export class Paragraph { tabs.push(Tab.fromXML(word, tabElement)); } - return {cssStyle, pStyle, numPr, tabs}; + const autoSpace = parseAutoSpace(element); + + return {cssStyle, pStyle, numPr, tabs, autoSpace}; } static fromXML(word: Word, element: Element): Paragraph { diff --git a/packages/office-viewer/src/openxml/word/Table.ts b/packages/office-viewer/src/openxml/word/Table.ts index 1f1d19bd6..1d160cf1f 100644 --- a/packages/office-viewer/src/openxml/word/Table.ts +++ b/packages/office-viewer/src/openxml/word/Table.ts @@ -76,7 +76,8 @@ function parseTblJc(element: Element, cssStyle: CSSStyle) { switch (val) { case 'left': case 'start': - cssStyle['float'] = 'left'; + // TODO: 会导致前面的文字掉下去,感觉还是不能支持这个功能 + // cssStyle['float'] = 'left'; break; case 'right': case 'end': @@ -165,16 +166,26 @@ function parseTblLook(child: Element) { * http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/tblpPr.html * 只支持部分 */ -function parsetTlpPr(child: Element, style: CSSStyle) { - const topFromText = parseSize(child, 'w:topFromText'); - const bottomFromText = parseSize(child, 'w:bottomFromText'); - const rightFromText = parseSize(child, 'w:rightFromText'); - const leftFromText = parseSize(child, 'w:leftFromText'); - style['float'] = 'left'; - style['margin-bottom'] = addSize(style['margin-bottom'], bottomFromText); - style['margin-left'] = addSize(style['margin-left'], leftFromText); - style['margin-right'] = addSize(style['margin-right'], rightFromText); - style['margin-top'] = addSize(style['margin-top'], topFromText); +function parsetTlpPr(word: Word, child: Element, style: CSSStyle) { + // 如果设置 padding 会导致绝对定位不准确,所以一旦设置就不支持 + if (typeof word.renderOptions.padding === 'undefined') { + const tplpX = parseSize(child, 'w:tblpX'); + const tplpY = parseSize(child, 'w:tblpY'); + style.position = 'absolute'; + style.top = tplpY; + style.left = tplpX; + } + + // 之前想用 float 来实现,但是会导致文字掉下去 + // const topFromText = parseSize(child, 'w:topFromText'); + // const bottomFromText = parseSize(child, 'w:bottomFromText'); + // const rightFromText = parseSize(child, 'w:rightFromText'); + // const leftFromText = parseSize(child, 'w:leftFromText'); + // style['float'] = 'left'; + // style['margin-bottom'] = addSize(style['margin-bottom'], bottomFromText); + // style['margin-left'] = addSize(style['margin-left'], leftFromText); + // style['margin-right'] = addSize(style['margin-right'], rightFromText); + // style['margin-top'] = addSize(style['margin-top'], topFromText); } export class Table { @@ -257,7 +268,7 @@ export class Table { break; case 'w:tblpPr': - parsetTlpPr(child, tableStyle); + parsetTlpPr(word, child, tableStyle); break; default: diff --git a/packages/office-viewer/src/parse/parsePr.ts b/packages/office-viewer/src/parse/parsePr.ts index 209cebed9..86d44e9d4 100644 --- a/packages/office-viewer/src/parse/parsePr.ts +++ b/packages/office-viewer/src/parse/parsePr.ts @@ -343,6 +343,29 @@ export function parsePr(word: Word, element: Element, type: 'r' | 'p' = 'p') { // 目前是自动计算的,所以不需要这个了 break; + case 'w:bidi': + // http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/bidi_1.html + // TODO: 还不清楚和 w:textDirection 是什么关系 + if (getValBoolean(child, true)) { + console.warn('w:bidi is not supported.'); + } + break; + + case 'w:autoSpaceDE': + case 'w:autoSpaceDN': + // 这个在其它地方实现了 + break; + + case 'w:kinsoku': + // http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/kinsoku.html + // 控制不了所以忽略了 + break; + + case 'w:overflowPunct': + // http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/overflowPunct.html + // 支持不了 + break; + default: console.warn('parsePr Unknown tagName', tagName, child); } diff --git a/packages/office-viewer/src/render/renderHyperLink.ts b/packages/office-viewer/src/render/renderHyperLink.ts index f3457d247..c386bdc24 100644 --- a/packages/office-viewer/src/render/renderHyperLink.ts +++ b/packages/office-viewer/src/render/renderHyperLink.ts @@ -3,11 +3,16 @@ import {appendChild, createElement} from '../util/dom'; import Word from '../Word'; import {Run} from '../openxml/word/Run'; import renderRun from './renderRun'; +import type {Paragraph} from '../openxml/word/Paragraph'; /** * 渲染链接 */ -export function renderHyperLink(word: Word, hyperlink: Hyperlink): HTMLElement { +export function renderHyperLink( + word: Word, + hyperlink: Hyperlink, + paragraph?: Paragraph +): HTMLElement { const a = createElement('a') as HTMLAnchorElement; if (hyperlink.relation) { @@ -24,7 +29,7 @@ export function renderHyperLink(word: Word, hyperlink: Hyperlink): HTMLElement { for (const child of hyperlink.children) { if (child instanceof Run) { - const span = renderRun(word, child); + const span = renderRun(word, child, paragraph); appendChild(a, span); } } diff --git a/packages/office-viewer/src/render/renderNumbering.ts b/packages/office-viewer/src/render/renderNumbering.ts index 6e817e2c5..56db7c936 100644 --- a/packages/office-viewer/src/render/renderNumbering.ts +++ b/packages/office-viewer/src/render/renderNumbering.ts @@ -78,6 +78,11 @@ export function renderNumbering( return null; } + if (!numbering) { + console.warn('renderNumbering: numbering is empty'); + return null; + } + const num = numbering.nums[numId]; if (!num) { diff --git a/packages/office-viewer/src/render/renderParagraph.ts b/packages/office-viewer/src/render/renderParagraph.ts index 280ecf9f0..237963a11 100644 --- a/packages/office-viewer/src/render/renderParagraph.ts +++ b/packages/office-viewer/src/render/renderParagraph.ts @@ -45,11 +45,11 @@ export default function renderParagraph( for (const child of paragraph.children) { if (child instanceof Run) { - appendChild(p, renderRun(word, child)); + appendChild(p, renderRun(word, child, paragraph)); } else if (child instanceof BookmarkStart) { appendChild(p, renderBookmarkStart(word, child)); } else if (child instanceof Hyperlink) { - const hyperlink = renderHyperLink(word, child); + const hyperlink = renderHyperLink(word, child, paragraph); appendChild(p, hyperlink); } else if (child instanceof SmartTag) { renderInlineText(word, child, p); diff --git a/packages/office-viewer/src/render/renderRun.ts b/packages/office-viewer/src/render/renderRun.ts index 4fd091ebe..3503c7c4e 100644 --- a/packages/office-viewer/src/render/renderRun.ts +++ b/packages/office-viewer/src/render/renderRun.ts @@ -20,16 +20,27 @@ import {InstrText} from '../openxml/word/InstrText'; import {renderInstrText} from './renderInstrText'; import {Sym} from '../openxml/word/Sym'; import {renderSym} from './renderSym'; +import {cjkspace} from '../util/autoSpace'; +import type {Paragraph} from './../openxml/word/Paragraph'; const VARIABLE_CLASS_NAME = 'variable'; /** * 对文本进行替换 */ -function renderText(span: HTMLElement, word: Word, text: string) { +function renderText( + span: HTMLElement, + word: Word, + text: string, + paragraph?: Paragraph +) { // 简单过滤一下提升性能 if (text.indexOf('{{') === -1) { - span.textContent = text; + if (paragraph?.properties?.autoSpace) { + span.textContent = cjkspace(text.split('')); + } else { + span.textContent = text; + } } else { span.dataset.originText = text; // 加个标识,后续可以通过它来查找哪些变量需要替换,这样就不用重新渲染整个文档了 @@ -53,7 +64,7 @@ export function updateVariableText(word: Word) { /** * 渲染 run 节点 */ -export default function renderRun(word: Word, run: Run) { +export default function renderRun(word: Word, run: Run, paragraph?: Paragraph) { const span = createElement('span'); word.addClass(span, 'r'); @@ -62,12 +73,12 @@ export default function renderRun(word: Word, run: Run) { if (run.children.length === 1 && run.children[0] instanceof Text) { const text = run.children[0] as Text; - renderText(span, word, text.text); + renderText(span, word, text.text, paragraph); } else { for (const child of run.children) { if (child instanceof Text) { let newSpan = createElement('span'); - renderText(span, word, child.text); + renderText(span, word, child.text, paragraph); appendChild(span, newSpan); } else if (child instanceof Break) { const br = renderBr(child); diff --git a/packages/office-viewer/src/render/renderSection.ts b/packages/office-viewer/src/render/renderSection.ts index dec83efd9..1e9ad7054 100644 --- a/packages/office-viewer/src/render/renderSection.ts +++ b/packages/office-viewer/src/render/renderSection.ts @@ -8,6 +8,8 @@ import Word from '../Word'; export function renderSection(word: Word, section: Section) { const sectionEl = createElement('section') as HTMLElement; + // 用于后续绝对定位 + sectionEl.style.position = 'relative'; const props = section.properties; const pageSize = props.pageSize; if (pageSize) { diff --git a/packages/office-viewer/src/util/autoSpace.ts b/packages/office-viewer/src/util/autoSpace.ts new file mode 100644 index 000000000..f43ca662c --- /dev/null +++ b/packages/office-viewer/src/util/autoSpace.ts @@ -0,0 +1,41 @@ +/** + * 中英文间自动加空格,基于下面代码改的,去掉了 lodash 依赖 + * https://gist.github.com/wyl8899/e0f31068681023480e20c34f6b19a275 + */ + +/* Partial implementation from https://zhuanlan.zhihu.com/p/33612593 */ + +/* 标点 */ +const punctuationRegex = /\p{Punctuation}/u; +/* 空格 */ +const spaceRegex = /\p{Separator}/u; +/* CJK 字符,中日韩 */ +const cjkRegex = + /\p{Script=Han}|\p{Script=Katakana}|\p{Script=Hiragana}|\p{Script=Hangul}/u; + +const shouldSpace = (a: string, b: string): boolean => { + if (cjkRegex.test(a)) { + return !( + punctuationRegex.test(b) || + spaceRegex.test(b) || + cjkRegex.test(b) + ); + } else { + return cjkRegex.test(b) && !punctuationRegex.test(a) && !spaceRegex.test(a); + } +}; + +const join = ( + parts: string[], + sepFunc: (a: string, b: string) => string +): string => { + return parts.reduce((r, p, i) => { + const sep = i !== 0 ? sepFunc(p, parts[i - 1]) : ''; + return r + sep + p; + }, ''); +}; + +export const cjkspace = (strings: string[]): string => { + const filtered = strings.filter(c => c !== undefined && c !== '') as string[]; + return join(filtered, (a, b) => (shouldSpace(a, b) ? ' ' : '')); +};