From 95901eb8a829865c883a4374c53a0b8909b53c41 Mon Sep 17 00:00:00 2001 From: Koy Zhuang Date: Sat, 21 Sep 2024 19:45:05 +0800 Subject: [PATCH] fix(search): clean markdown elements in search contents (#2457) Co-authored-by: John Hildenbiddle --- src/plugins/search/component.js | 3 +- src/plugins/search/markdown-to-txt.js | 197 ++++++++++++++++++++++++++ src/plugins/search/search.js | 11 +- test/e2e/search.test.js | 101 +++++++++++++ 4 files changed, 306 insertions(+), 6 deletions(-) create mode 100644 src/plugins/search/markdown-to-txt.js diff --git a/src/plugins/search/component.js b/src/plugins/search/component.js index dc0ac2e..88fc3de 100644 --- a/src/plugins/search/component.js +++ b/src/plugins/search/component.js @@ -49,12 +49,13 @@ function doSearch(value) { let html = ''; matches.forEach((post, i) => { + const content = post.content ? `...${post.content}...` : ''; const title = (post.title || '').replace(/<[^>]+>/g, ''); html += /* html */ `

${post.title}

-

${post.content}

+

${content}

`; diff --git a/src/plugins/search/markdown-to-txt.js b/src/plugins/search/markdown-to-txt.js new file mode 100644 index 0000000..90e042b --- /dev/null +++ b/src/plugins/search/markdown-to-txt.js @@ -0,0 +1,197 @@ +/** + * This is a function to convert markdown to txt based on markedjs v13+. + * Copies the escape/unescape functions from [lodash](https://www.npmjs.com/package/lodash) instead import to reduce the size. + */ +import { marked } from 'marked'; + +const reEscapedHtml = /&(?:amp|lt|gt|quot|#(0+)?39);/g; +const reHasEscapedHtml = RegExp(reEscapedHtml.source); +const htmlUnescapes = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + ''': "'", +}; + +function unescape(string) { + return string && reHasEscapedHtml.test(string) + ? string.replace(reEscapedHtml, entity => htmlUnescapes[entity] || "'") + : string || ''; +} + +const reUnescapedHtml = /[&<>"']/g; +const reHasUnescapedHtml = RegExp(reUnescapedHtml.source); +const htmlEscapes = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''', +}; + +function escape(string) { + return string && reHasUnescapedHtml.test(string) + ? string.replace(reUnescapedHtml, chr => htmlEscapes[chr]) + : string || ''; +} + +function helpersCleanup(string) { + return string && string.replace('!>', '').replace('?>', ''); +} + +const markdownToTxtRenderer = { + space() { + return ''; + }, + + code({ text }) { + const code = text.replace(/\n$/, ''); + return escape(code); + }, + + blockquote({ tokens }) { + return this.parser?.parse(tokens) || ''; + }, + + html() { + return ''; + }, + + heading({ tokens }) { + return this.parser?.parse(tokens) || ''; + }, + + hr() { + return ''; + }, + + list(token) { + let body = ''; + for (let j = 0; j < token.items.length; j++) { + const item = token.items[j]; + body += this.listitem?.(item); + } + + return body; + }, + + listitem(item) { + let itemBody = ''; + if (item.task) { + const checkbox = this.checkbox?.({ checked: !!item.checked }); + if (item.loose) { + if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') { + item.tokens[0].text = checkbox + ' ' + item.tokens[0].text; + if ( + item.tokens[0].tokens && + item.tokens[0].tokens.length > 0 && + item.tokens[0].tokens[0].type === 'text' + ) { + item.tokens[0].tokens[0].text = + checkbox + ' ' + item.tokens[0].tokens[0].text; + } + } else { + item.tokens.unshift({ + type: 'text', + raw: checkbox + ' ', + text: checkbox + ' ', + }); + } + } else { + itemBody += checkbox + ' '; + } + } + + itemBody += this.parser?.parse(item.tokens, !!item.loose); + + return `${itemBody || ''}`; + }, + + checkbox() { + return ''; + }, + + paragraph({ tokens }) { + return this.parser?.parseInline(tokens) || ''; + }, + + table(token) { + let header = ''; + + let cell = ''; + for (let j = 0; j < token.header.length; j++) { + cell += this.tablecell?.(token.header[j]); + } + header += this.tablerow?.({ text: cell }); + + let body = ''; + for (let j = 0; j < token.rows.length; j++) { + const row = token.rows[j]; + + cell = ''; + for (let k = 0; k < row.length; k++) { + cell += this.tablecell?.(row[k]); + } + + body += this.tablerow?.({ text: cell }); + } + + return header + ' ' + body; + }, + + tablerow({ text }) { + return text; + }, + + tablecell(token) { + return this.parser?.parseInline(token.tokens) || ''; + }, + + strong({ text }) { + return text; + }, + + em({ tokens }) { + return this.parser?.parseInline(tokens) || ''; + }, + + codespan({ text }) { + return text; + }, + + br() { + return ' '; + }, + + del({ tokens }) { + return this.parser?.parseInline(tokens); + }, + + link({ tokens, href, title }) { + // Remain the href and title attributes for searching, so is the image + // e.g. [filename](_media/example.js ':include :type=code :fragment=demo') + // Result: filename _media/example.js :include :type=code :fragment=demo + return `${this.parser?.parseInline(tokens) || ''} ${href || ''} ${title || ''}`; + }, + + image({ title, text, href }) { + return `${text || ''} ${href || ''} ${title || ''}`; + }, + + text(token) { + return token.tokens + ? this.parser?.parseInline(token.tokens) || '' + : token.text || ''; + }, +}; +const _marked = marked.setOptions({ renderer: markdownToTxtRenderer }); + +export function markdownToTxt(markdown) { + const unmarked = _marked.parse(markdown); + const unescaped = unescape(unmarked); + const helpersCleaned = helpersCleanup(unescaped); + return helpersCleaned.trim(); +} + +export default markdownToTxt; diff --git a/src/plugins/search/search.js b/src/plugins/search/search.js index 31a6948..d4e33e3 100644 --- a/src/plugins/search/search.js +++ b/src/plugins/search/search.js @@ -2,6 +2,7 @@ import { getAndRemoveConfig, getAndRemoveDocsifyIgnoreConfig, } from '../../core/render/utils.js'; +import { markdownToTxt } from './markdown-to-txt.js'; import Dexie from 'dexie'; let INDEXES = {}; @@ -134,7 +135,7 @@ export function genIndex(path, content = '', router, depth, indexKey) { index[slug] = { slug, title: path !== '/' ? path.slice(1) : 'Home Page', - body: token.text || '', + body: markdownToTxt(token.text || ''), path: path, indexKey: indexKey, }; @@ -150,12 +151,12 @@ export function genIndex(path, content = '', router, depth, indexKey) { token.text = getTableData(token); token.text = getListData(token); - index[slug].body += '\n' + (token.text || ''); + index[slug].body += '\n' + markdownToTxt(token.text || ''); } else { token.text = getTableData(token); token.text = getListData(token); - index[slug].body = token.text || ''; + index[slug].body = markdownToTxt(token.text || ''); } index[slug].path = path; @@ -229,8 +230,8 @@ export function search(query) { start = indexContent < 11 ? 0 : indexContent - 10; end = start === 0 ? 100 : indexContent + keyword.length + 90; - if (postContent && end > postContent.length) { - end = postContent.length; + if (handlePostContent && end > handlePostContent.length) { + end = handlePostContent.length; } const matchContent = diff --git a/test/e2e/search.test.js b/test/e2e/search.test.js index 6d0d2c1..0d0056a 100644 --- a/test/e2e/search.test.js +++ b/test/e2e/search.test.js @@ -232,4 +232,105 @@ test.describe('Search Plugin Tests', () => { await page.keyboard.press('z'); await expect(searchFieldElm).toBeFocused(); }); + test('search result should remove markdown code block', async ({ page }) => { + const docsifyInitConfig = { + markdown: { + homepage: ` +# Hello World + +searchHere +\`\`\`js +console.log('Hello World'); +\`\`\` + `, + }, + scriptURLs: ['/dist/plugins/search.js'], + }; + + const searchFieldElm = page.locator('input[type=search]'); + const resultsHeadingElm = page.locator('.results-panel .content'); + + await docsifyInit(docsifyInitConfig); + await searchFieldElm.fill('searchHere'); + // there is a newline after searchHere and the markdown part ```js ``` it should be removed + expect(await resultsHeadingElm.textContent()).toContain( + "...searchHere\nconsole.log('Hello World');...", + ); + }); + + test('search result should remove file markdown and keep href attribution for files', async ({ + page, + }) => { + const docsifyInitConfig = { + markdown: { + homepage: ` +# Hello World +![filename](_media/example.js ':include :type=code :fragment=demo') + `, + }, + scriptURLs: ['/dist/plugins/search.js'], + }; + + const searchFieldElm = page.locator('input[type=search]'); + const resultsHeadingElm = page.locator('.results-panel .content'); + + await docsifyInit(docsifyInitConfig); + await searchFieldElm.fill('filename'); + expect(await resultsHeadingElm.textContent()).toContain( + '...filename _media/example.js :include :type=code :fragment=demo...', + ); + }); + + test('search result should remove checkbox markdown and keep related values', async ({ + page, + }) => { + const docsifyInitConfig = { + markdown: { + homepage: ` +# Hello World + +- [ ] Task 1 +- [x] SearchHere +- [ ] Task 3 + `, + }, + scriptURLs: ['/dist/plugins/search.js'], + }; + + const searchFieldElm = page.locator('input[type=search]'); + const resultsHeadingElm = page.locator('.results-panel .content'); + + await docsifyInit(docsifyInitConfig); + await searchFieldElm.fill('SearchHere'); + // remove the checkbox markdown and keep the related values + expect(await resultsHeadingElm.textContent()).toContain( + '...Task 1 SearchHere Task 3...', + ); + }); + + test('search result should remove docsify self helper markdown and keep related values', async ({ + page, + }) => { + const docsifyInitConfig = { + markdown: { + homepage: ` +# Hello World + +!> SearchHere to check it! + + `, + }, + scriptURLs: ['/dist/plugins/search.js'], + }; + + const searchFieldElm = page.locator('input[type=search]'); + const resultsHeadingElm = page.locator('.results-panel .content'); + + await docsifyInit(docsifyInitConfig); + await searchFieldElm.fill('SearchHere'); + // remove the helper markdown and keep the related values + expect(await resultsHeadingElm.textContent()).toContain( + '...SearchHere to check it!...', + ); + }); });