chore: office viewer 支持中英文空行;优化 tblpPr 的支持 (#6433)

This commit is contained in:
吴多益 2023-03-21 18:46:12 +08:00 committed by GitHub
parent 7eb20c0a75
commit 3f1e39c560
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 150 additions and 22 deletions

View File

@ -385,6 +385,7 @@ export function enableDebug() {
interface DebugWrapperProps {
renderer: any;
children?: React.ReactNode;
}
export class DebugWrapper extends Component<DebugWrapperProps> {

View File

@ -6,6 +6,10 @@ docx 渲染器,原理是将 docx 里的 xml 格式转成 html
相对于 Canvas 渲染,这个实现方案比较简单,最终页面也可以很方便复制,但无法保证和原始 docx 文件展现一致,因为有部分功能难以在 HTML 中实现,比如图文环绕效果。
## 还不支持的功能
- wmf需要使用 https://github.com/SheetJS/js-wmf
## 参考资料
- [官方规范](https://www.ecma-international.org/publications-and-standards/standards/ecma-376/)

Binary file not shown.

View File

@ -0,0 +1,9 @@
import {cjkspace} from '../../src/util/autoSpace';
test('autoSpace', async () => {
expect(cjkspace('a中'.split(''))).toBe('a 中');
});
test('autoSpace 2', async () => {
expect(cjkspace('abc中def测试'.split(''))).toBe('abc 中 def测试');
});

View File

@ -52,6 +52,9 @@ export class Body {
body.addChild(table);
break;
case 'w:bookmarkEnd':
break;
default:
console.warn('Body.fromXML Unknown key', tagName, child);
}

View File

@ -21,6 +21,11 @@ export interface ParagraphPr extends Properties {
numPr?: NumberPr;
runPr?: RunPr;
tabs?: Tab[];
/**
* autoSpaceDN autoSpaceDE
*/
autoSpace?: boolean;
}
export type ParagraphChild =
@ -45,6 +50,12 @@ export type ParagraphChild =
// | CommentRangeEnd
// | CommentReference;
function parseAutoSpace(element: Element): boolean {
const autoSpaceDE = element.getElementsByTagName('w:autoSpaceDE').item(0);
const autoSpaceDN = element.getElementsByTagName('w:autoSpaceDN').item(0);
return !!autoSpaceDE || !!autoSpaceDN;
}
export class Paragraph {
// 主要是为了方便调试用的
paraId?: string;
@ -78,7 +89,9 @@ export class Paragraph {
tabs.push(Tab.fromXML(word, tabElement));
}
return {cssStyle, pStyle, numPr, tabs};
const autoSpace = parseAutoSpace(element);
return {cssStyle, pStyle, numPr, tabs, autoSpace};
}
static fromXML(word: Word, element: Element): Paragraph {

View File

@ -76,7 +76,8 @@ function parseTblJc(element: Element, cssStyle: CSSStyle) {
switch (val) {
case 'left':
case 'start':
cssStyle['float'] = 'left';
// TODO: 会导致前面的文字掉下去,感觉还是不能支持这个功能
// cssStyle['float'] = 'left';
break;
case 'right':
case 'end':
@ -165,16 +166,26 @@ function parseTblLook(child: Element) {
* http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/tblpPr.html
*
*/
function parsetTlpPr(child: Element, style: CSSStyle) {
const topFromText = parseSize(child, 'w:topFromText');
const bottomFromText = parseSize(child, 'w:bottomFromText');
const rightFromText = parseSize(child, 'w:rightFromText');
const leftFromText = parseSize(child, 'w:leftFromText');
style['float'] = 'left';
style['margin-bottom'] = addSize(style['margin-bottom'], bottomFromText);
style['margin-left'] = addSize(style['margin-left'], leftFromText);
style['margin-right'] = addSize(style['margin-right'], rightFromText);
style['margin-top'] = addSize(style['margin-top'], topFromText);
function parsetTlpPr(word: Word, child: Element, style: CSSStyle) {
// 如果设置 padding 会导致绝对定位不准确,所以一旦设置就不支持
if (typeof word.renderOptions.padding === 'undefined') {
const tplpX = parseSize(child, 'w:tblpX');
const tplpY = parseSize(child, 'w:tblpY');
style.position = 'absolute';
style.top = tplpY;
style.left = tplpX;
}
// 之前想用 float 来实现,但是会导致文字掉下去
// const topFromText = parseSize(child, 'w:topFromText');
// const bottomFromText = parseSize(child, 'w:bottomFromText');
// const rightFromText = parseSize(child, 'w:rightFromText');
// const leftFromText = parseSize(child, 'w:leftFromText');
// style['float'] = 'left';
// style['margin-bottom'] = addSize(style['margin-bottom'], bottomFromText);
// style['margin-left'] = addSize(style['margin-left'], leftFromText);
// style['margin-right'] = addSize(style['margin-right'], rightFromText);
// style['margin-top'] = addSize(style['margin-top'], topFromText);
}
export class Table {
@ -257,7 +268,7 @@ export class Table {
break;
case 'w:tblpPr':
parsetTlpPr(child, tableStyle);
parsetTlpPr(word, child, tableStyle);
break;
default:

View File

@ -343,6 +343,29 @@ export function parsePr(word: Word, element: Element, type: 'r' | 'p' = 'p') {
// 目前是自动计算的,所以不需要这个了
break;
case 'w:bidi':
// http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/bidi_1.html
// TODO: 还不清楚和 w:textDirection 是什么关系
if (getValBoolean(child, true)) {
console.warn('w:bidi is not supported.');
}
break;
case 'w:autoSpaceDE':
case 'w:autoSpaceDN':
// 这个在其它地方实现了
break;
case 'w:kinsoku':
// http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/kinsoku.html
// 控制不了所以忽略了
break;
case 'w:overflowPunct':
// http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/overflowPunct.html
// 支持不了
break;
default:
console.warn('parsePr Unknown tagName', tagName, child);
}

View File

@ -3,11 +3,16 @@ import {appendChild, createElement} from '../util/dom';
import Word from '../Word';
import {Run} from '../openxml/word/Run';
import renderRun from './renderRun';
import type {Paragraph} from '../openxml/word/Paragraph';
/**
*
*/
export function renderHyperLink(word: Word, hyperlink: Hyperlink): HTMLElement {
export function renderHyperLink(
word: Word,
hyperlink: Hyperlink,
paragraph?: Paragraph
): HTMLElement {
const a = createElement('a') as HTMLAnchorElement;
if (hyperlink.relation) {
@ -24,7 +29,7 @@ export function renderHyperLink(word: Word, hyperlink: Hyperlink): HTMLElement {
for (const child of hyperlink.children) {
if (child instanceof Run) {
const span = renderRun(word, child);
const span = renderRun(word, child, paragraph);
appendChild(a, span);
}
}

View File

@ -78,6 +78,11 @@ export function renderNumbering(
return null;
}
if (!numbering) {
console.warn('renderNumbering: numbering is empty');
return null;
}
const num = numbering.nums[numId];
if (!num) {

View File

@ -45,11 +45,11 @@ export default function renderParagraph(
for (const child of paragraph.children) {
if (child instanceof Run) {
appendChild(p, renderRun(word, child));
appendChild(p, renderRun(word, child, paragraph));
} else if (child instanceof BookmarkStart) {
appendChild(p, renderBookmarkStart(word, child));
} else if (child instanceof Hyperlink) {
const hyperlink = renderHyperLink(word, child);
const hyperlink = renderHyperLink(word, child, paragraph);
appendChild(p, hyperlink);
} else if (child instanceof SmartTag) {
renderInlineText(word, child, p);

View File

@ -20,16 +20,27 @@ import {InstrText} from '../openxml/word/InstrText';
import {renderInstrText} from './renderInstrText';
import {Sym} from '../openxml/word/Sym';
import {renderSym} from './renderSym';
import {cjkspace} from '../util/autoSpace';
import type {Paragraph} from './../openxml/word/Paragraph';
const VARIABLE_CLASS_NAME = 'variable';
/**
*
*/
function renderText(span: HTMLElement, word: Word, text: string) {
function renderText(
span: HTMLElement,
word: Word,
text: string,
paragraph?: Paragraph
) {
// 简单过滤一下提升性能
if (text.indexOf('{{') === -1) {
if (paragraph?.properties?.autoSpace) {
span.textContent = cjkspace(text.split(''));
} else {
span.textContent = text;
}
} else {
span.dataset.originText = text;
// 加个标识,后续可以通过它来查找哪些变量需要替换,这样就不用重新渲染整个文档了
@ -53,7 +64,7 @@ export function updateVariableText(word: Word) {
/**
* run
*/
export default function renderRun(word: Word, run: Run) {
export default function renderRun(word: Word, run: Run, paragraph?: Paragraph) {
const span = createElement('span');
word.addClass(span, 'r');
@ -62,12 +73,12 @@ export default function renderRun(word: Word, run: Run) {
if (run.children.length === 1 && run.children[0] instanceof Text) {
const text = run.children[0] as Text;
renderText(span, word, text.text);
renderText(span, word, text.text, paragraph);
} else {
for (const child of run.children) {
if (child instanceof Text) {
let newSpan = createElement('span');
renderText(span, word, child.text);
renderText(span, word, child.text, paragraph);
appendChild(span, newSpan);
} else if (child instanceof Break) {
const br = renderBr(child);

View File

@ -8,6 +8,8 @@ import Word from '../Word';
export function renderSection(word: Word, section: Section) {
const sectionEl = createElement('section') as HTMLElement;
// 用于后续绝对定位
sectionEl.style.position = 'relative';
const props = section.properties;
const pageSize = props.pageSize;
if (pageSize) {

View File

@ -0,0 +1,41 @@
/**
* lodash
* https://gist.github.com/wyl8899/e0f31068681023480e20c34f6b19a275
*/
/* Partial implementation from https://zhuanlan.zhihu.com/p/33612593 */
/* 标点 */
const punctuationRegex = /\p{Punctuation}/u;
/* 空格 */
const spaceRegex = /\p{Separator}/u;
/* CJK 字符,中日韩 */
const cjkRegex =
/\p{Script=Han}|\p{Script=Katakana}|\p{Script=Hiragana}|\p{Script=Hangul}/u;
const shouldSpace = (a: string, b: string): boolean => {
if (cjkRegex.test(a)) {
return !(
punctuationRegex.test(b) ||
spaceRegex.test(b) ||
cjkRegex.test(b)
);
} else {
return cjkRegex.test(b) && !punctuationRegex.test(a) && !spaceRegex.test(a);
}
};
const join = (
parts: string[],
sepFunc: (a: string, b: string) => string
): string => {
return parts.reduce((r, p, i) => {
const sep = i !== 0 ? sepFunc(p, parts[i - 1]) : '';
return r + sep + p;
}, '');
};
export const cjkspace = (strings: string[]): string => {
const filtered = strings.filter(c => c !== undefined && c !== '') as string[];
return join(filtered, (a, b) => (shouldSpace(a, b) ? ' ' : ''));
};