1、安装maven依赖
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.7</version>
<scope>compile</scope>
</dependency>
2、建立service和实现类
2.1 IDrawingPdfService接口
public interface IDrawingPdfService {
String extractText(MultipartFile file);
}
2.2 DrawingPdfServiceImpl实现类
package com.example.flowable_eng.service.impl;
import com.example.flowable_eng.service.IDrawingPdfService;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import technology.tabula.*;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.stream.Collectors;
@Service
@Slf4j
public class DrawingPdfServiceImpl implements IDrawingPdfService {
@Override
public String extractText(MultipartFile file) {
try (InputStream inputStream = file.getInputStream();
PDDocument document = Loader.loadPDF(inputStream.readAllBytes())) {
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);
return stripper.getText(document);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
3、控制器测试
package com.example.flowable_eng.controller;
import com.example.flowable_eng.service.IDrawingPdfService;
import jakarta.annotation.Resource;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import java.util.List;
import java.util.Map;
@RestController
public class DrawingPdfController {
@Resource
private IDrawingPdfService drawingPdfService;
@PostMapping("/extractText")
public String extractText(@RequestParam("file") MultipartFile file) {
return drawingPdfService.extractText(file);
}
}