Skip to content

使用Claude改了改代码,不知道能不能解决修复之前长文档难以下载的问题 #40

@yjdeng1

Description

@yjdeng1

apps/chrome-extension/src/scripts/download-lark-docx-as-markdown.ts

import i18next from 'i18next'
import { Toast, Docx, docx, mdast } from '@dolphin/lark'
import { fileSave, supported } from 'browser-fs-access'
import { fs } from '@zip.js/zip.js'
import normalizeFileName from 'filenamify/browser'
import { cluster } from 'radash'
import { CommonTranslationKey, en, Namespace, zh } from '../common/i18n'
import { confirm } from '../common/notification'
import { legacyFileSave } from '../common/legacy'
import { reportBug } from '../common/issue'

const DOWNLOAD_ABORTED = 'Download aborted'
// 添加重试次数和超时设置
const MAX_RETRIES = 3
const DOWNLOAD_TIMEOUT = 60000 // 60秒超时
const BATCH_SIZE = 5 // 减小并发数量

const enum TranslationKey {
  CONTENT_LOADING = 'content_loading',
  UNKNOWN_ERROR = 'unknown_error',
  NOT_SUPPORT = 'not_support',
  DOWNLOADING_FILE = 'downloading_file',
  FAILED_TO_DOWNLOAD = 'failed_to_download',
  DOWNLOAD_PROGRESS = 'download_progress',
  DOWNLOAD_COMPLETE = 'download_complete',
  STILL_SAVING = 'still_saving',
  IMAGE = 'image',
  FILE = 'file',
  CANCEL = 'cancel',
  RETRYING = 'retrying', // 新增:重试提示
  DOCUMENT_TOO_LARGE = 'document_too_large', // 新增:文档过大提示
  FAILED_IMAGES = 'failed_images', // 新增:失败图片统计
}

enum ToastKey {
  DOWNLOADING = 'downloading',
}

i18next.init({
  lng: docx.language,
  resources: {
    en: {
      translation: {
        [TranslationKey.CONTENT_LOADING]:
          'Part of the content is still loading and cannot be downloaded at the moment. Please wait for loading to complete and retry',
        [TranslationKey.UNKNOWN_ERROR]: 'Unknown error during download',
        [TranslationKey.NOT_SUPPORT]:
          'This is not a lark document page and cannot be downloaded as Markdown',
        [TranslationKey.DOWNLOADING_FILE]:
          'Download {{name}} in: {{progress}}% (please do not refresh or close the page)',
        [TranslationKey.FAILED_TO_DOWNLOAD]: 'Failed to download {{name}}',
        [TranslationKey.STILL_SAVING]:
          'Still saving (please do not refresh or close the page)',
        [TranslationKey.DOWNLOAD_PROGRESS]:
          '{{name}} download progress: {{progress}} %',
        [TranslationKey.DOWNLOAD_COMPLETE]: 'Download complete',
        [TranslationKey.IMAGE]: 'Image',
        [TranslationKey.FILE]: 'File',
        [TranslationKey.CANCEL]: 'Cancel',
        [TranslationKey.RETRYING]: 'Retrying ({{attempt}}/{{max}}): {{name}}',
        [TranslationKey.DOCUMENT_TOO_LARGE]: 'Document is very large. Processing may take longer than usual.',
        [TranslationKey.FAILED_IMAGES]: '{{count}} images failed to download. The document may be incomplete.',
      },
      ...en,
    },
    zh: {
      translation: {
        [TranslationKey.CONTENT_LOADING]:
          '部分内容仍在加载中,暂时无法下载。请等待加载完成后重试',
        [TranslationKey.UNKNOWN_ERROR]: '下载过程中出现未知错误',
        [TranslationKey.NOT_SUPPORT]:
          '这不是一个飞书文档页面,无法下载为 Markdown',
        [TranslationKey.DOWNLOADING_FILE]:
          '下载 {{name}} 中:{{progress}}%(请不要刷新或关闭页面)',
        [TranslationKey.FAILED_TO_DOWNLOAD]: '下载 {{name}} 失败',
        [TranslationKey.STILL_SAVING]: '仍在保存中(请不要刷新或关闭页面)',
        [TranslationKey.DOWNLOAD_PROGRESS]: '{{name}}下载进度:{{progress}}%',
        [TranslationKey.DOWNLOAD_COMPLETE]: '下载完成',
        [TranslationKey.IMAGE]: '图片',
        [TranslationKey.FILE]: '文件',
        [TranslationKey.CANCEL]: '取消',
        [TranslationKey.RETRYING]: '正在重试 ({{attempt}}/{{max}}): {{name}}',
        [TranslationKey.DOCUMENT_TOO_LARGE]: '文档较大,处理可能需要较长时间。',
        [TranslationKey.FAILED_IMAGES]: '{{count}}张图片下载失败,文档可能不完整。',
      },
      ...zh,
    },
  },
})

const usedNames: Set<string> = new Set()
const fileNameToPreId: Map<string, number> = new Map()
const uniqueFileName = (originFileName: string) => {
  if (usedNames.has(originFileName)) {
    const startDotIndex = originFileName.lastIndexOf('.')

    const preId = fileNameToPreId.get(originFileName) ?? 0
    const id = preId + 1
    fileNameToPreId.set(originFileName, id)

    const fileName =
      startDotIndex === -1
        ? originFileName.concat(`-${id}`)
        : originFileName
            .slice(0, startDotIndex)
            .concat(`-${id}`)
            .concat(originFileName.slice(startDotIndex))

    return fileName
  }

  usedNames.add(originFileName)

  return originFileName
}

interface ProgressOptions {
  onProgress?: (progress: number) => void
  onComplete?: () => void
}

// 添加超时控制
async function toBlob(
  response: Response,
  options: ProgressOptions & { timeout?: number } = {},
): Promise<Blob> {
  if (!response.ok) {
    throw new Error(`HTTP error! status: ${response.status}`)
  }

  if (!response.body) {
    throw new Error('This request has no response body.')
  }

  const { onProgress, onComplete, timeout = DOWNLOAD_TIMEOUT } = options

  // 添加超时promise
  const timeoutPromise = new Promise<never>((_, reject) => {
    const id = setTimeout(() => {
      clearTimeout(id)
      reject(new Error('Download timeout'))
    }, timeout)
  })

  const reader = response.body.getReader()
  const contentLength = parseInt(
    response.headers.get('Content-Length') ?? '0',
    10,
  )

  let receivedLength = 0
  const chunks = []

  const downloadPromise = async () => {
    let _done = false
    while (!_done) {
      const { done, value } = await reader.read()

      _done = done

      if (done) {
        onComplete?.()
        break
      }

      chunks.push(value)
      receivedLength += value.length

      onProgress?.(receivedLength / contentLength)
    }

    return new Blob(chunks)
  }

  // 使用Promise.race实现超时
  return Promise.race([downloadPromise(), timeoutPromise])
}

// 添加重试逻辑
const retry = async <T>(
  fn: () => Promise<T>,
  { maxRetries = MAX_RETRIES, name = '' } = {},
): Promise<T> => {
  let lastError: Error | null = null

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      return await fn()
    } catch (error) {
      lastError = error instanceof Error ? error : new Error(String(error))
      
      // 如果是最后一次尝试,不再重试
      if (attempt === maxRetries) break
      
      // 显示重试中的提示
      Toast.loading({
        content: i18next.t(TranslationKey.RETRYING, {
          attempt,
          max: maxRetries,
          name,
        }),
        keepAlive: true,
        key: `retry-${name}-${attempt}`,
      })
      
      // 延迟重试,指数退避
      await new Promise(resolve => setTimeout(resolve, 1000 * attempt))
      
      // 移除重试提示
      Toast.remove(`retry-${name}-${attempt}`)
    }
  }

  if (lastError) throw lastError
  throw new Error('Unknown error during retry')
}

// 增加失败统计
let failedImages = 0

const downloadImage = async (
  image: mdast.Image,
): Promise<DownloadResult | null> => {
  if (!image.data) return null

  const { name: originName, fetchSources, fetchBlob } = image.data

  try {
    // whiteboard
    if (fetchBlob) {
      return await retry(async () => {
        const content = await fetchBlob()
        if (!content) return null

        const name = uniqueFileName('diagram.png')
        const filename = `images/${name}`

        image.url = filename

        return {
          filename,
          content,
        }
      }, { name: originName || 'diagram' })
    }

    // image
    if (originName && fetchSources) {
      return await retry(async () => {
        const sources = await fetchSources()

        if (!sources) return null

        const name = uniqueFileName(originName)
        const filename = `images/${name}`

        const { src } = sources
        const response = await fetch(src)
        try {
          const blob = await toBlob(response, {
            onProgress: progress => {
              Toast.loading({
                content: i18next.t(TranslationKey.DOWNLOADING_FILE, {
                  name,
                  progress: Math.floor(progress * 100),
                }),
                keepAlive: true,
                key: filename,
              })
            },
          })

          image.url = filename

          return {
            filename,
            content: blob,
          }
        } finally {
          Toast.remove(filename)
        }
      }, { name: originName })
    }

    return null
  } catch (error) {
    failedImages++
    
    Toast.error({
      content: i18next.t(TranslationKey.FAILED_TO_DOWNLOAD, {
        name: originName,
      }),
      actionText: i18next.t(CommonTranslationKey.CONFIRM_REPORT_BUG, {
        ns: Namespace.COMMON,
      }),
      onActionClick: () => {
        reportBug(error)
      },
    })

    return null
  }
}

const downloadFile = async (
  file: mdast.Link,
): Promise<DownloadResult | null> => {
  if (!file.data || !file.data.name || !file.data.fetchFile) return null

  const { name, fetchFile } = file.data

  try {
    return await retry(async () => {
      const filename = `files/${uniqueFileName(name)}`

      const controller = new AbortController()

      const cancel = () => {
        controller.abort()
      }

      const response = await fetchFile({ signal: controller.signal })
      try {
        const blob = await toBlob(response, {
          onProgress: progress => {
            Toast.loading({
              content: i18next.t(TranslationKey.DOWNLOADING_FILE, {
                name,
                progress: Math.floor(progress * 100),
              }),
              keepAlive: true,
              key: filename,
              actionText: i18next.t(TranslationKey.CANCEL),
              onActionClick: cancel,
            })
          },
        })

        file.url = filename

        return {
          filename,
          content: blob,
        }
      } finally {
        Toast.remove(filename)
      }
    }, { name })
  } catch (error) {
    if (error instanceof Error && error.name === 'AbortError') {
      return null
    }

    Toast.error({
      content: i18next.t(TranslationKey.FAILED_TO_DOWNLOAD, {
        name,
      }),
      actionText: i18next.t(CommonTranslationKey.CONFIRM_REPORT_BUG, {
        ns: Namespace.COMMON,
      }),
      onActionClick: () => {
        reportBug(error)
      },
    })

    return null
  }
}

interface DownloadResult {
  filename: string
  content: Blob
}

type File = mdast.Image | mdast.Link

const downloadFiles = async (
  files: File[],
  options: ProgressOptions & {
    /**
     * @default 5
     */
    batchSize?: number
  } = {},
): Promise<DownloadResult[]> => {
  const { onProgress, onComplete, batchSize = BATCH_SIZE } = options

  const results: DownloadResult[] = []

  const totalSize = files.length
  let downloadedSize = 0

  for await (const batch of cluster(files, batchSize)) {
    await Promise.allSettled(
      batch.map(async file => {
        try {
          const result =
            file.type === 'image'
              ? await downloadImage(file)
              : await downloadFile(file)

          if (result) {
            results.push(result)
          }
        } finally {
          downloadedSize++

          onProgress?.(downloadedSize / totalSize)
        }
      }),
    )
  }

  onComplete?.()

  return results
}

// 添加检查文档大小的功能
const checkDocumentSize = () => {
  // 通过检查内容长度或节点数量来评估文档大小
  const contentSize = docx.rootBlock ? JSON.stringify(docx.rootBlock).length : 0
  
  // 大文档警告阈值 (5MB)
  if (contentSize > 5 * 1024 * 1024) {
    Toast.warning({
      content: i18next.t(TranslationKey.DOCUMENT_TOO_LARGE),
      duration: 5000,
    })
    return true
  }
  
  return false
}

// 添加等待文档完全加载的函数
const waitForFullLoad = async (timeout = 10000): Promise<boolean> => {
  const startTime = Date.now()
  
  return new Promise(resolve => {
    const checkLoading = () => {
      if (docx.isReady() && !docx.isLoading()) {
        resolve(true)
        return
      }
      
      if (Date.now() - startTime > timeout) {
        resolve(false)
        return
      }
      
      setTimeout(checkLoading, 500)
    }
    
    checkLoading()
  })
}

const main = async () => {
  if (!docx.rootBlock) {
    Toast.warning({ content: i18next.t(TranslationKey.NOT_SUPPORT) })
    return
  }

  // 等待文档完全加载
  const isFullyLoaded = await waitForFullLoad()
  
  if (!isFullyLoaded) {
    Toast.warning({
      content: i18next.t(TranslationKey.CONTENT_LOADING),
    })
    return
  }

  // 重置失败图片计数
  failedImages = 0
  
  // 检查文档大小
  const isLargeDoc = checkDocumentSize()

  const { root, images, files } = docx.intoMarkdownAST({
    whiteboard: true,
    file: true,
  })

  const recommendName = docx.pageTitle
    ? normalizeFileName(docx.pageTitle.slice(0, 100))
    : 'doc'
  const isZip = images.length > 0 || files.length > 0
  const ext = isZip ? '.zip' : '.md'
  const filename = `${recommendName}${ext}`

  const toBlob = async () => {
    Toast.loading({
      content: i18next.t(TranslationKey.STILL_SAVING),
      keepAlive: true,
      key: ToastKey.DOWNLOADING,
    })

    const singleFileContent = () => {
      const markdown = Docx.stringify(root)
      return new Blob([markdown])
    }

    const zipFileContent = async () => {
      const zipFs = new fs.FS()

      const results = await Promise.all([
        downloadFiles(images, {
          batchSize: BATCH_SIZE,
          onProgress: progress => {
            Toast.loading({
              content: i18next.t(TranslationKey.DOWNLOAD_PROGRESS, {
                name: i18next.t(TranslationKey.IMAGE),
                progress: Math.floor(progress * 100),
              }),
              keepAlive: true,
              key: TranslationKey.IMAGE,
            })
          },
          onComplete: () => {
            Toast.remove(TranslationKey.IMAGE)
          },
        }),
        downloadFiles(files, {
          batchSize: Math.max(1, BATCH_SIZE / 2), // 文件下载减少并发
          onProgress: progress => {
            Toast.loading({
              content: i18next.t(TranslationKey.DOWNLOAD_PROGRESS, {
                name: i18next.t(TranslationKey.FILE),
                progress: Math.floor(progress * 100),
              }),
              keepAlive: true,
              key: TranslationKey.FILE,
            })
          },
          onComplete: () => {
            Toast.remove(TranslationKey.FILE)
          },
        }),
      ])
      
      results.flat(1).forEach(({ filename, content }) => {
        zipFs.addBlob(filename, content)
      })

      const markdown = Docx.stringify(root)

      zipFs.addText(`${recommendName}.md`, markdown)

      return await zipFs.exportBlob()
    }

    const content = isZip ? await zipFileContent() : singleFileContent()

    return content
  }

  if (supported) {
    if (!navigator.userActivation.isActive) {
      const confirmed = await confirm()
      if (!confirmed) {
        throw new Error(DOWNLOAD_ABORTED)
      }
    }

    await fileSave(toBlob(), {
      fileName: filename,
      extensions: [ext],
    })
  } else {
    const blob = await toBlob()

    legacyFileSave(blob, {
      fileName: filename,
    })
  }
}

main()
  .then(() => {
    // 显示下载完成,但如果有失败的图片,额外提示
    if (failedImages > 0) {
      Toast.warning({
        content: i18next.t(TranslationKey.FAILED_IMAGES, {
          count: failedImages
        }),
        duration: 5000,
      })
    }
    
    Toast.success({
      content: i18next.t(TranslationKey.DOWNLOAD_COMPLETE),
    })
  })
  .catch((error: DOMException | TypeError | Error) => {
    if (error.name !== 'AbortError' && error.message !== DOWNLOAD_ABORTED) {
      Toast.error({
        content: String(error),
        actionText: i18next.t(CommonTranslationKey.CONFIRM_REPORT_BUG, {
          ns: Namespace.COMMON,
        }),
        onActionClick: () => {
          reportBug(error)
        },
      })
    }
  })
  .finally(() => {
    Toast.remove(ToastKey.DOWNLOADING)
  })

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions