Java实现Word、Excel、PDF文件 在线预览

在线预览文档通常有几种方案:转换为HTML、转换为PDF再预览、使用专门的预览服务等。下面我将介绍几种实现方式。


方案一:使用开源库转换为HTML预览

1. 添加Maven依赖

XML 复制代码
<dependencies>
    <!-- Word处理 -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi</artifactId>
        <version>5.2.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>5.2.3</version>
    </dependency>
    
    <!-- Excel处理 -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-scratchpad</artifactId>
        <version>5.2.3</version>
    </dependency>
    
    <!-- PDF处理 -->
    <dependency>
        <groupId>org.apache.pdfbox</groupId>
        <artifactId>pdfbox</artifactId>
        <version>2.0.27</version>
    </dependency>
    
    <!-- 文件类型检测 -->
    <dependency>
        <groupId>org.apache.tika</groupId>
        <artifactId>tika-core</artifactId>
        <version>2.4.1</version>
    </dependency>
    
    <!-- Spring Boot Web -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
        <version>2.7.0</version>
    </dependency>
    
    <!-- 模板引擎 -->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-thymeleaf</artifactId>
        <version>2.7.0</version>
    </dependency>
</dependencies>

2. 文件预览服务实现

java 复制代码
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.stereotype.Service;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

@Service
public class FilePreviewService {

    /**
     * 预览Word文档(转换为HTML)
     */
    public String previewWord(File file) throws Exception {
        String fileName = file.getName().toLowerCase();
        
        if (fileName.endsWith(".doc")) {
            return convertDocToHtml(file);
        } else if (fileName.endsWith(".docx")) {
            return convertDocxToHtml(file);
        } else {
            throw new IllegalArgumentException("不支持的Word格式");
        }
    }

    /**
     * 预览Excel文档(转换为HTML表格)
     */
    public String previewExcel(File file) throws Exception {
        String fileName = file.getName().toLowerCase();
        Workbook workbook;
        
        try (FileInputStream fis = new FileInputStream(file)) {
            if (fileName.endsWith(".xls")) {
                workbook = new HSSFWorkbook(fis);
            } else if (fileName.endsWith(".xlsx")) {
                workbook = new XSSFWorkbook(fis);
            } else {
                throw new IllegalArgumentException("不支持的Excel格式");
            }
            
            return convertExcelToHtml(workbook);
        }
    }

    /**
     * 预览PDF文档(提取文本内容)
     */
    public String previewPdf(File file) throws Exception {
        try (PDDocument document = PDDocument.load(file)) {
            PDFTextStripper stripper = new PDFTextStripper();
            String text = stripper.getText(document);
            
            // 简单的HTML包装
            return "<html><body><pre>" + escapeHtml(text) + "</pre></body></html>";
        }
    }

    /**
     * 通用文件预览方法
     */
    public String previewFile(File file) throws Exception {
        String fileName = file.getName().toLowerCase();
        
        if (fileName.endsWith(".doc") || fileName.endsWith(".docx")) {
            return previewWord(file);
        } else if (fileName.endsWith(".xls") || fileName.endsWith(".xlsx")) {
            return previewExcel(file);
        } else if (fileName.endsWith(".pdf")) {
            return previewPdf(file);
        } else {
            throw new IllegalArgumentException("不支持的文件格式");
        }
    }

    // 私有方法:具体转换实现
    private String convertDocToHtml(File file) throws Exception {
        try (HWPFDocument document = new HWPFDocument(new FileInputStream(file))) {
            WordToHtmlConverter converter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
            );
            converter.processDocument(document);
            
            Document htmlDocument = converter.getDocument();
            return convertDocumentToString(htmlDocument);
        }
    }

    private String convertDocxToHtml(File file) throws Exception {
        try (XWPFDocument document = new XWPFDocument(new FileInputStream(file))) {
            StringWriter writer = new StringWriter();
            XHTMLConverter.getInstance().convert(document, writer, null);
            return writer.toString();
        }
    }

    private String convertExcelToHtml(Workbook workbook) {
        StringBuilder html = new StringBuilder();
        html.append("<html><body><table border='1'>");
        
        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
            Sheet sheet = workbook.getSheetAt(i);
            html.append("<tr><th colspan='10'>").append(sheet.getSheetName()).append("</th></tr>");
            
            for (Row row : sheet) {
                html.append("<tr>");
                for (Cell cell : row) {
                    html.append("<td>");
                    switch (cell.getCellType()) {
                        case STRING:
                            html.append(escapeHtml(cell.getStringCellValue()));
                            break;
                        case NUMERIC:
                            html.append(cell.getNumericCellValue());
                            break;
                        case BOOLEAN:
                            html.append(cell.getBooleanCellValue());
                            break;
                        default:
                            html.append(" ");
                    }
                    html.append("</td>");
                }
                html.append("</tr>");
            }
        }
        
        html.append("</table></body></html>");
        return html.toString();
    }

    private String convertDocumentToString(org.w3c.dom.Document doc) throws Exception {
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer transformer = tf.newTransformer();
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        transformer.setOutputProperty(OutputKeys.METHOD, "html");
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        
        StringWriter writer = new StringWriter();
        transformer.transform(new DOMSource(doc), new StreamResult(writer));
        return writer.toString();
    }

    private String escapeHtml(String text) {
        return text.replace("&", "&amp;")
                  .replace("<", "&lt;")
                  .replace(">", "&gt;")
                  .replace("\"", "&quot;")
                  .replace("'", "&#39;");
    }
}

3. Spring Boot控制器

java 复制代码
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

@Controller
public class FilePreviewController {

    @Autowired
    private FilePreviewService filePreviewService;

    private final Path rootLocation = Paths.get("upload-dir");

    @GetMapping("/")
    public String index() {
        return "upload";
    }

    @PostMapping("/upload")
    public String handleFileUpload(@RequestParam("file") MultipartFile file, Model model) {
        try {
            // 创建上传目录
            if (!Files.exists(rootLocation)) {
                Files.createDirectories(rootLocation);
            }

            // 保存文件
            Path destinationFile = rootLocation.resolve(
                Paths.get(file.getOriginalFilename()))
                .normalize().toAbsolutePath();
            file.transferTo(destinationFile.toFile());

            // 预览文件
            String htmlContent = filePreviewService.previewFile(destinationFile.toFile());
            model.addAttribute("previewContent", htmlContent);
            model.addAttribute("fileName", file.getOriginalFilename());

            return "preview";

        } catch (Exception e) {
            model.addAttribute("error", "文件预览失败: " + e.getMessage());
            return "upload";
        }
    }

    @GetMapping("/preview/{filename:.+}")
    @ResponseBody
    public ResponseEntity<String> previewFile(@PathVariable String filename) {
        try {
            Path file = rootLocation.resolve(filename);
            String htmlContent = filePreviewService.previewFile(file.toFile());
            return ResponseEntity.ok()
                .header(HttpHeaders.CONTENT_TYPE, "text/html; charset=utf-8")
                .body(htmlContent);
        } catch (Exception e) {
            return ResponseEntity.badRequest().body("预览失败: " + e.getMessage());
        }
    }

    @GetMapping("/download/{filename:.+}")
    @ResponseBody
    public ResponseEntity<Resource> downloadFile(@PathVariable String filename) {
        try {
            Path file = rootLocation.resolve(filename);
            Resource resource = new UrlResource(file.toUri());
            
            return ResponseEntity.ok()
                .header(HttpHeaders.CONTENT_DISPOSITION, 
                    "attachment; filename=\"" + resource.getFilename() + "\"")
                .body(resource);
        } catch (Exception e) {
            return ResponseEntity.notFound().build();
        }
    }
}

4. HTML模板

src/main/resources/templates/upload.html

html 复制代码
<!DOCTYPE html>
<html xmlns:th="http://www.thymeleaf.org">
<head>
    <meta charset="UTF-8">
    <title>文件上传预览</title>
</head>
<body>
    <h2>文件上传预览</h2>
    
    <form method="POST" enctype="multipart/form-data">
        <input type="file" name="file" accept=".doc,.docx,.xls,.xlsx,.pdf">
        <button type="submit">上传并预览</button>
    </form>
    
    <div th:if="${error}" style="color: red;">
        <p th:text="${error}"></p>
    </div>
</body>
</html>

src/main/resources/templates/preview.html

html 复制代码
<!DOCTYPE html>
<html xmlns:th="http://www.thymeleaf.org">
<head>
    <meta charset="UTF-8">
    <title>文件预览</title>
    <style>
        .preview-container {
            border: 1px solid #ccc;
            padding: 20px;
            margin: 20px 0;
            max-height: 600px;
            overflow: auto;
        }
        .actions {
            margin: 10px 0;
        }
    </style>
</head>
<body>
    <h2>文件预览: <span th:text="${fileName}"></span></h2>
    
    <div class="actions">
        <a th:href="@{'/download/' + ${fileName}}">下载原文件</a> |
        <a href="/">返回上传</a>
    </div>
    
    <div class="preview-container" th:utext="${previewContent}">
        <!-- 预览内容将在这里显示 -->
    </div>
</body>
</html>

方案二:使用第三方服务(推荐用于生产环境)

对于生产环境,建议使用专门的文档预览服务:

  1. 使用Microsoft Office Online Server

  2. 使用Google Docs预览服务

  3. 使用专业的文档预览SDK(如GroupDocs、Aspose等)

使用Google Docs预览的示例:

java 复制代码
public String previewWithGoogleDocs(String fileUrl) {
    // 将文件上传到可访问的URL,然后使用Google Docs预览
    return "https://docs.google.com/gview?url=" + URLEncoder.encode(fileUrl, "UTF-8") + "&embedded=true";
}

方案三:使用专业的Java库(Aspose)

java 复制代码
// 需要购买许可证
import com.aspose.words.Document;
import com.aspose.words.HtmlSaveOptions;

public String previewWithAspose(File file) throws Exception {
    Document doc = new Document(file.getAbsolutePath());
    HtmlSaveOptions options = new HtmlSaveOptions();
    options.setExportImagesAsBase64(true);
    
    StringWriter writer = new StringWriter();
    doc.save(writer, options);
    return writer.toString();
}

启动类

java 复制代码
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class FilePreviewApplication {
    public static void main(String[] args) {
        SpringApplication.run(FilePreviewApplication.class, args);
    }
}

配置说明

application.properties中添加:

html 复制代码
# 文件上传大小限制
spring.servlet.multipart.max-file-size=10MB
spring.servlet.multipart.max-request-size=10MB

#  thymeleaf配置
spring.thymeleaf.cache=false
spring.thymeleaf.prefix=classpath:/templates/
spring.thymeleaf.suffix=.html

使用说明

  1. 启动应用后访问 http://localhost:8080

  2. 上传Word、Excel或PDF文件

  3. 系统会自动转换为HTML格式进行预览

注意事项

  1. 性能考虑:大文件转换可能耗时,建议添加进度提示

  2. 内存管理:处理大文件时注意内存使用,建议使用流式处理

  3. 格式兼容性:复杂格式可能无法完美转换

  4. 安全性:对上传文件进行病毒扫描和类型验证

  5. 缓存策略:对已转换的文件进行缓存以提高性能

这种方案适合中小型项目的文档预览需求,对于企业级应用,建议使用专业的文档预览服务。

相关推荐
武子康2 小时前
Java-185 Guava Cache 实战:删除策略、过期机制与常见坑全梳理
java·spring boot·redis·spring·缓存·guava·guava cache
阿杰同学2 小时前
Java 网络协议面试题答案整理,最新面试题
java·开发语言·网络协议
CoderYanger2 小时前
动态规划算法-两个数组的dp(含字符串数组):41.最长公共子序列(模板)
java·算法·leetcode·动态规划·1024程序员节
凌波粒2 小时前
Springboot基础教程(8)--Shiro
java·spring boot·后端
dzl843942 小时前
springboot脚手架备忘
java
feathered-feathered3 小时前
网络套接字——Socket网络编程(TCP编程详解)
java·网络·后端·网络协议·tcp/ip
路边草随风4 小时前
java实现发布spark yarn作业
java·spark·yarn
为爱停留5 小时前
Spring AI实现MCP(Model Context Protocol)详解与实践
java·人工智能·spring
汝生淮南吾在北8 小时前
SpringBoot+Vue饭店点餐管理系统
java·vue.js·spring boot·毕业设计·毕设