-
Notifications
You must be signed in to change notification settings - Fork 101
Closed
Description
apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts
import i18next from 'i18next'
import { Toast, Docx, docx, mdast } from '@dolphin/lark'
import { fileSave, supported } from 'browser-fs-access'
import { fs } from '@zip.js/zip.js'
import normalizeFileName from 'filenamify/browser'
import { cluster } from 'radash'
import { CommonTranslationKey, en, Namespace, zh } from '../common/i18n'
import { confirm } from '../common/notification'
import { legacyFileSave } from '../common/legacy'
import { reportBug } from '../common/issue'
const DOWNLOAD_ABORTED = 'Download aborted'
// 添加重试次数和超时设置
const MAX_RETRIES = 3
const DOWNLOAD_TIMEOUT = 60000 // 60秒超时
const BATCH_SIZE = 5 // 减小并发数量
const enum TranslationKey {
CONTENT_LOADING = 'content_loading',
UNKNOWN_ERROR = 'unknown_error',
NOT_SUPPORT = 'not_support',
DOWNLOADING_FILE = 'downloading_file',
FAILED_TO_DOWNLOAD = 'failed_to_download',
DOWNLOAD_PROGRESS = 'download_progress',
DOWNLOAD_COMPLETE = 'download_complete',
STILL_SAVING = 'still_saving',
IMAGE = 'image',
FILE = 'file',
CANCEL = 'cancel',
RETRYING = 'retrying', // 新增:重试提示
DOCUMENT_TOO_LARGE = 'document_too_large', // 新增:文档过大提示
FAILED_IMAGES = 'failed_images', // 新增:失败图片统计
}
enum ToastKey {
DOWNLOADING = 'downloading',
}
i18next.init({
lng: docx.language,
resources: {
en: {
translation: {
[TranslationKey.CONTENT_LOADING]:
'Part of the content is still loading and cannot be downloaded at the moment. Please wait for loading to complete and retry',
[TranslationKey.UNKNOWN_ERROR]: 'Unknown error during download',
[TranslationKey.NOT_SUPPORT]:
'This is not a lark document page and cannot be downloaded as Markdown',
[TranslationKey.DOWNLOADING_FILE]:
'Download {{name}} in: {{progress}}% (please do not refresh or close the page)',
[TranslationKey.FAILED_TO_DOWNLOAD]: 'Failed to download {{name}}',
[TranslationKey.STILL_SAVING]:
'Still saving (please do not refresh or close the page)',
[TranslationKey.DOWNLOAD_PROGRESS]:
'{{name}} download progress: {{progress}} %',
[TranslationKey.DOWNLOAD_COMPLETE]: 'Download complete',
[TranslationKey.IMAGE]: 'Image',
[TranslationKey.FILE]: 'File',
[TranslationKey.CANCEL]: 'Cancel',
[TranslationKey.RETRYING]: 'Retrying ({{attempt}}/{{max}}): {{name}}',
[TranslationKey.DOCUMENT_TOO_LARGE]: 'Document is very large. Processing may take longer than usual.',
[TranslationKey.FAILED_IMAGES]: '{{count}} images failed to download. The document may be incomplete.',
},
...en,
},
zh: {
translation: {
[TranslationKey.CONTENT_LOADING]:
'部分内容仍在加载中,暂时无法下载。请等待加载完成后重试',
[TranslationKey.UNKNOWN_ERROR]: '下载过程中出现未知错误',
[TranslationKey.NOT_SUPPORT]:
'这不是一个飞书文档页面,无法下载为 Markdown',
[TranslationKey.DOWNLOADING_FILE]:
'下载 {{name}} 中:{{progress}}%(请不要刷新或关闭页面)',
[TranslationKey.FAILED_TO_DOWNLOAD]: '下载 {{name}} 失败',
[TranslationKey.STILL_SAVING]: '仍在保存中(请不要刷新或关闭页面)',
[TranslationKey.DOWNLOAD_PROGRESS]: '{{name}}下载进度:{{progress}}%',
[TranslationKey.DOWNLOAD_COMPLETE]: '下载完成',
[TranslationKey.IMAGE]: '图片',
[TranslationKey.FILE]: '文件',
[TranslationKey.CANCEL]: '取消',
[TranslationKey.RETRYING]: '正在重试 ({{attempt}}/{{max}}): {{name}}',
[TranslationKey.DOCUMENT_TOO_LARGE]: '文档较大,处理可能需要较长时间。',
[TranslationKey.FAILED_IMAGES]: '{{count}}张图片下载失败,文档可能不完整。',
},
...zh,
},
},
})
const usedNames: Set<string> = new Set()
const fileNameToPreId: Map<string, number> = new Map()
const uniqueFileName = (originFileName: string) => {
if (usedNames.has(originFileName)) {
const startDotIndex = originFileName.lastIndexOf('.')
const preId = fileNameToPreId.get(originFileName) ?? 0
const id = preId + 1
fileNameToPreId.set(originFileName, id)
const fileName =
startDotIndex === -1
? originFileName.concat(`-${id}`)
: originFileName
.slice(0, startDotIndex)
.concat(`-${id}`)
.concat(originFileName.slice(startDotIndex))
return fileName
}
usedNames.add(originFileName)
return originFileName
}
interface ProgressOptions {
onProgress?: (progress: number) => void
onComplete?: () => void
}
// 添加超时控制
async function toBlob(
response: Response,
options: ProgressOptions & { timeout?: number } = {},
): Promise<Blob> {
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`)
}
if (!response.body) {
throw new Error('This request has no response body.')
}
const { onProgress, onComplete, timeout = DOWNLOAD_TIMEOUT } = options
// 添加超时promise
const timeoutPromise = new Promise<never>((_, reject) => {
const id = setTimeout(() => {
clearTimeout(id)
reject(new Error('Download timeout'))
}, timeout)
})
const reader = response.body.getReader()
const contentLength = parseInt(
response.headers.get('Content-Length') ?? '0',
10,
)
let receivedLength = 0
const chunks = []
const downloadPromise = async () => {
let _done = false
while (!_done) {
const { done, value } = await reader.read()
_done = done
if (done) {
onComplete?.()
break
}
chunks.push(value)
receivedLength += value.length
onProgress?.(receivedLength / contentLength)
}
return new Blob(chunks)
}
// 使用Promise.race实现超时
return Promise.race([downloadPromise(), timeoutPromise])
}
// 添加重试逻辑
const retry = async <T>(
fn: () => Promise<T>,
{ maxRetries = MAX_RETRIES, name = '' } = {},
): Promise<T> => {
let lastError: Error | null = null
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
return await fn()
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error))
// 如果是最后一次尝试,不再重试
if (attempt === maxRetries) break
// 显示重试中的提示
Toast.loading({
content: i18next.t(TranslationKey.RETRYING, {
attempt,
max: maxRetries,
name,
}),
keepAlive: true,
key: `retry-${name}-${attempt}`,
})
// 延迟重试,指数退避
await new Promise(resolve => setTimeout(resolve, 1000 * attempt))
// 移除重试提示
Toast.remove(`retry-${name}-${attempt}`)
}
}
if (lastError) throw lastError
throw new Error('Unknown error during retry')
}
// 增加失败统计
let failedImages = 0
const downloadImage = async (
image: mdast.Image,
): Promise<DownloadResult | null> => {
if (!image.data) return null
const { name: originName, fetchSources, fetchBlob } = image.data
try {
// whiteboard
if (fetchBlob) {
return await retry(async () => {
const content = await fetchBlob()
if (!content) return null
const name = uniqueFileName('diagram.png')
const filename = `images/${name}`
image.url = filename
return {
filename,
content,
}
}, { name: originName || 'diagram' })
}
// image
if (originName && fetchSources) {
return await retry(async () => {
const sources = await fetchSources()
if (!sources) return null
const name = uniqueFileName(originName)
const filename = `images/${name}`
const { src } = sources
const response = await fetch(src)
try {
const blob = await toBlob(response, {
onProgress: progress => {
Toast.loading({
content: i18next.t(TranslationKey.DOWNLOADING_FILE, {
name,
progress: Math.floor(progress * 100),
}),
keepAlive: true,
key: filename,
})
},
})
image.url = filename
return {
filename,
content: blob,
}
} finally {
Toast.remove(filename)
}
}, { name: originName })
}
return null
} catch (error) {
failedImages++
Toast.error({
content: i18next.t(TranslationKey.FAILED_TO_DOWNLOAD, {
name: originName,
}),
actionText: i18next.t(CommonTranslationKey.CONFIRM_REPORT_BUG, {
ns: Namespace.COMMON,
}),
onActionClick: () => {
reportBug(error)
},
})
return null
}
}
const downloadFile = async (
file: mdast.Link,
): Promise<DownloadResult | null> => {
if (!file.data || !file.data.name || !file.data.fetchFile) return null
const { name, fetchFile } = file.data
try {
return await retry(async () => {
const filename = `files/${uniqueFileName(name)}`
const controller = new AbortController()
const cancel = () => {
controller.abort()
}
const response = await fetchFile({ signal: controller.signal })
try {
const blob = await toBlob(response, {
onProgress: progress => {
Toast.loading({
content: i18next.t(TranslationKey.DOWNLOADING_FILE, {
name,
progress: Math.floor(progress * 100),
}),
keepAlive: true,
key: filename,
actionText: i18next.t(TranslationKey.CANCEL),
onActionClick: cancel,
})
},
})
file.url = filename
return {
filename,
content: blob,
}
} finally {
Toast.remove(filename)
}
}, { name })
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
return null
}
Toast.error({
content: i18next.t(TranslationKey.FAILED_TO_DOWNLOAD, {
name,
}),
actionText: i18next.t(CommonTranslationKey.CONFIRM_REPORT_BUG, {
ns: Namespace.COMMON,
}),
onActionClick: () => {
reportBug(error)
},
})
return null
}
}
interface DownloadResult {
filename: string
content: Blob
}
type File = mdast.Image | mdast.Link
const downloadFiles = async (
files: File[],
options: ProgressOptions & {
/**
* @default 5
*/
batchSize?: number
} = {},
): Promise<DownloadResult[]> => {
const { onProgress, onComplete, batchSize = BATCH_SIZE } = options
const results: DownloadResult[] = []
const totalSize = files.length
let downloadedSize = 0
for await (const batch of cluster(files, batchSize)) {
await Promise.allSettled(
batch.map(async file => {
try {
const result =
file.type === 'image'
? await downloadImage(file)
: await downloadFile(file)
if (result) {
results.push(result)
}
} finally {
downloadedSize++
onProgress?.(downloadedSize / totalSize)
}
}),
)
}
onComplete?.()
return results
}
// 添加检查文档大小的功能
const checkDocumentSize = () => {
// 通过检查内容长度或节点数量来评估文档大小
const contentSize = docx.rootBlock ? JSON.stringify(docx.rootBlock).length : 0
// 大文档警告阈值 (5MB)
if (contentSize > 5 * 1024 * 1024) {
Toast.warning({
content: i18next.t(TranslationKey.DOCUMENT_TOO_LARGE),
duration: 5000,
})
return true
}
return false
}
// 添加等待文档完全加载的函数
const waitForFullLoad = async (timeout = 10000): Promise<boolean> => {
const startTime = Date.now()
return new Promise(resolve => {
const checkLoading = () => {
if (docx.isReady() && !docx.isLoading()) {
resolve(true)
return
}
if (Date.now() - startTime > timeout) {
resolve(false)
return
}
setTimeout(checkLoading, 500)
}
checkLoading()
})
}
const main = async () => {
if (!docx.rootBlock) {
Toast.warning({ content: i18next.t(TranslationKey.NOT_SUPPORT) })
return
}
// 等待文档完全加载
const isFullyLoaded = await waitForFullLoad()
if (!isFullyLoaded) {
Toast.warning({
content: i18next.t(TranslationKey.CONTENT_LOADING),
})
return
}
// 重置失败图片计数
failedImages = 0
// 检查文档大小
const isLargeDoc = checkDocumentSize()
const { root, images, files } = docx.intoMarkdownAST({
whiteboard: true,
file: true,
})
const recommendName = docx.pageTitle
? normalizeFileName(docx.pageTitle.slice(0, 100))
: 'doc'
const isZip = images.length > 0 || files.length > 0
const ext = isZip ? '.zip' : '.md'
const filename = `${recommendName}${ext}`
const toBlob = async () => {
Toast.loading({
content: i18next.t(TranslationKey.STILL_SAVING),
keepAlive: true,
key: ToastKey.DOWNLOADING,
})
const singleFileContent = () => {
const markdown = Docx.stringify(root)
return new Blob([markdown])
}
const zipFileContent = async () => {
const zipFs = new fs.FS()
const results = await Promise.all([
downloadFiles(images, {
batchSize: BATCH_SIZE,
onProgress: progress => {
Toast.loading({
content: i18next.t(TranslationKey.DOWNLOAD_PROGRESS, {
name: i18next.t(TranslationKey.IMAGE),
progress: Math.floor(progress * 100),
}),
keepAlive: true,
key: TranslationKey.IMAGE,
})
},
onComplete: () => {
Toast.remove(TranslationKey.IMAGE)
},
}),
downloadFiles(files, {
batchSize: Math.max(1, BATCH_SIZE / 2), // 文件下载减少并发
onProgress: progress => {
Toast.loading({
content: i18next.t(TranslationKey.DOWNLOAD_PROGRESS, {
name: i18next.t(TranslationKey.FILE),
progress: Math.floor(progress * 100),
}),
keepAlive: true,
key: TranslationKey.FILE,
})
},
onComplete: () => {
Toast.remove(TranslationKey.FILE)
},
}),
])
results.flat(1).forEach(({ filename, content }) => {
zipFs.addBlob(filename, content)
})
const markdown = Docx.stringify(root)
zipFs.addText(`${recommendName}.md`, markdown)
return await zipFs.exportBlob()
}
const content = isZip ? await zipFileContent() : singleFileContent()
return content
}
if (supported) {
if (!navigator.userActivation.isActive) {
const confirmed = await confirm()
if (!confirmed) {
throw new Error(DOWNLOAD_ABORTED)
}
}
await fileSave(toBlob(), {
fileName: filename,
extensions: [ext],
})
} else {
const blob = await toBlob()
legacyFileSave(blob, {
fileName: filename,
})
}
}
main()
.then(() => {
// 显示下载完成,但如果有失败的图片,额外提示
if (failedImages > 0) {
Toast.warning({
content: i18next.t(TranslationKey.FAILED_IMAGES, {
count: failedImages
}),
duration: 5000,
})
}
Toast.success({
content: i18next.t(TranslationKey.DOWNLOAD_COMPLETE),
})
})
.catch((error: DOMException | TypeError | Error) => {
if (error.name !== 'AbortError' && error.message !== DOWNLOAD_ABORTED) {
Toast.error({
content: String(error),
actionText: i18next.t(CommonTranslationKey.CONFIRM_REPORT_BUG, {
ns: Namespace.COMMON,
}),
onActionClick: () => {
reportBug(error)
},
})
}
})
.finally(() => {
Toast.remove(ToastKey.DOWNLOADING)
})
Metadata
Metadata
Assignees
Labels
No labels