无头浏览器chromedriver使用(目前不支持国产操作系统)

1.查询系统架构

bash 复制代码
uname -m

国产操作系统架构不可使用(官方暂不支持Linux-arm架构)

2.服务器上安装操作

无头浏览器服务下载地址(按照系统架构下载)

  1. 把chromedriver-linux64.zip解压出来的chromedriver放到/usr/bin下面

  2. 用ekp账号把chrome-headless-shell-linux64.zip解压到opt或者其他目录下,然后将具体路径

    写入代码配置中HeadlessBrowserUtil.DEFAULT_HEAD_LESS_SHELL_PATH

java 复制代码
/**
 * 无头浏览器工具类
 * 支持显式等待 JS 执行完成标志 + 直接生成 PDF byte[]
 */
public class HeadlessBrowserUtil implements AutoCloseable {

    private static final Logger logger = LoggerFactory.getLogger(HeadlessBrowserUtil.class);
    // 常量定义(提升维护性)
    private static final long DEFAULT_TIMEOUT_SECONDS = 30;
    private static final String DEFAULT_PDF_PAGE_SIZE = "A4";
    private static final boolean DEFAULT_PRINT_BACKGROUND = true;
    private static final String DEFAULT_CHROME_DRIVER_PATH = "/usr/bin/chromedriver";
    private static final String DEFAULT_HEAD_LESS_SHELL_PATH = "/home/fengxh/DevEnv/chroma/chrome-headless-shell-linux64/chrome-headless-shell";

    private WebDriver driver;
    private WebDriverWait defaultWait;
    private final long defaultTimeoutSeconds;

    // ========== 构造方法 ==========
    public HeadlessBrowserUtil() {
        this(DEFAULT_TIMEOUT_SECONDS);
    }

    public HeadlessBrowserUtil(long defaultTimeoutSeconds) {
        this.defaultTimeoutSeconds = defaultTimeoutSeconds;
        initDriver();
    }

    /**
     * 自定义 ChromeOptions 的构造方法
     */
    public HeadlessBrowserUtil(ChromeOptions customOptions) {
        this(customOptions, DEFAULT_TIMEOUT_SECONDS);
    }

    public HeadlessBrowserUtil(ChromeOptions customOptions, long defaultTimeoutSeconds) {
        this.defaultTimeoutSeconds = defaultTimeoutSeconds;
        try {
            this.driver = new ChromeDriver(customOptions);
            this.defaultWait = new WebDriverWait(driver, Duration.ofSeconds(defaultTimeoutSeconds));
            logger.info("ChromeDriver初始化成功(自定义Options)");
        } catch (Exception e) {
            logger.error("ChromeDriver初始化失败(自定义Options)", e);
            throw new RuntimeException("ChromeDriver初始化失败", e);
        }
    }

    // ========== 初始化 ==========
    // 先在类顶部新增常量(替换为你的ChromeDriver实际路径)
//    private static final String DEFAULT_HEAD_LESS_SHELL_PATH = "/home/fengxh/DevEnv/chroma/chrome-headless-shell-linux64/chrome-headless-shell";

    private void initDriver() {
        ChromeOptions options = new ChromeOptions();
        // 1. 显式指定ChromeDriver路径(关键!避免系统加载旧版本)
        System.setProperty("webdriver.chrome.driver", DEFAULT_CHROME_DRIVER_PATH);

        // 2. 适配144版本的无头参数(精简+兼容)
        options.setBinary(DEFAULT_HEAD_LESS_SHELL_PATH);
        options.addArguments("--headless=new");          // 144版本推荐的无头模式
        options.addArguments("--disable-gpu");           // 兼容Linux无GPU环境
        options.addArguments("--window-size=1920,1080");
        options.addArguments("--remote-allow-origins=*");// 解决跨域限制
        options.addArguments("--disable-dev-shm-usage"); // 关键!Linux下避免/dev/shm内存不足
        options.addArguments("--no-sandbox");            // 非root用户运行必需(Linux常见问题)
        options.addArguments("--disable-extensions");    // 禁用扩展,减少干扰
        // 以下参数144版本可保留,不影响兼容
        options.addArguments("--disable-blink-features=AutomationControlled");
        options.setExperimentalOption("excludeSwitches", new String[]{"enable-automation"});

        try {
            this.driver = new ChromeDriver(options);
            this.defaultWait = new WebDriverWait(driver, Duration.ofSeconds(defaultTimeoutSeconds));
            logger.info("ChromeDriver初始化成功,默认超时时间={}秒,ChromeDriver路径={}",
                    defaultTimeoutSeconds, DEFAULT_CHROME_DRIVER_PATH);
        } catch (Exception e) {
            // 补充详细错误日志(路径、版本、异常栈)
            logger.error("===== ChromeDriver初始化失败详情 =====", e);
            logger.error("1. ChromeDriver路径:{}", DEFAULT_CHROME_DRIVER_PATH);
            logger.error("2. Headless Shell路径:{}", DEFAULT_HEAD_LESS_SHELL_PATH);
            logger.error("3. 异常类型:{}", e.getClass().getName());
            logger.error("4. 异常信息:{}", e.getMessage());
            throw new RuntimeException("无头浏览器初始化失败,请检查日志中路径/版本/依赖", e);
        }
    }

    // ========== 核心新增:生成 PDF byte[] 方法 ==========
    /**
     * 生成当前页面的 PDF 字节数组(基于 CDP,无临时文件)
     * @return PDF 字节数组
     */
    public byte[] generatePdfAsBytes() {
        return generatePdfAsBytes(DEFAULT_PDF_PAGE_SIZE, DEFAULT_PRINT_BACKGROUND, false);
    }

    /**
     * 自定义 PDF 参数生成字节数组
     * @param pageSize 页面大小(A4/Letter/Legal/A3 等)
     * @param printBackground 是否打印背景色/图片
     * @param removeMargins 是否移除打印边距
     * @return PDF 字节数组
     */
    public byte[] generatePdfAsBytes(String pageSize, boolean printBackground, boolean removeMargins) {
        try {
            // 1. 等待页面完全渲染(确保异步资源加载完成)
            waitForPageLoad();
            waitForAllImagesLoaded();
            logger.info("页面渲染完成,开始通过CDP生成PDF");

            // 2. 构造CDP打印参数
            Map<String, Object> printParams = new HashMap<>();
            printParams.put("paperWidth", getPageWidth(pageSize)); // 宽度(英寸)
            printParams.put("paperHeight", getPageHeight(pageSize)); // 高度(英寸)
            printParams.put("printBackground", printBackground); // 打印背景
            printParams.put("marginTop", removeMargins ? 0 : 0.4); // 上边距(英寸)
            printParams.put("marginBottom", removeMargins ? 0 : 0.4); // 下边距
            printParams.put("marginLeft", removeMargins ? 0 : 0.4); // 左边距
            printParams.put("marginRight", removeMargins ? 0 : 0.4); // 右边距
            printParams.put("preferCSSPageSize", true); // 优先使用CSS定义的页面大小
            printParams.put("transferMode", "ReturnAsBase64"); // 返回Base64格式

            // 3. 调用CDP的Page.printToPDF方法
            ChromeDriver chromeDriver = (ChromeDriver) this.driver;
            Map<String, Object> result = chromeDriver.executeCdpCommand("Page.printToPDF", printParams);
            String pdfBase64 = (String) result.get("data");

            // 4. Base64转byte[]
            byte[] pdfBytes = java.util.Base64.getDecoder().decode(pdfBase64);
            logger.info("PDF生成成功,字节数={}", pdfBytes.length);
            return pdfBytes;
        } catch (Exception e) {
            logger.error("生成PDF字节数组失败", e);
            throw new RuntimeException("PDF生成失败", e);
        }
    }

    /**
     * 注入HTML字符串并生成PDF字节数组(一站式调用)
     * @param htmlContent HTML字符串
     * @return PDF字节数组
     */
    public byte[] generatePdfFromHtml(String htmlContent) {
        // 注入HTML并等待渲染
        injectHtml(htmlContent);
        // 生成PDF
        return generatePdfAsBytes();
    }

    // ========== 辅助方法 ==========
    /**
     * 注入HTML字符串到空白页(解决本地HTML渲染问题)
     */
    public HeadlessBrowserUtil injectHtml(String htmlContent) {
        try {
            // 打开空白页
            get("about:blank").waitForPageLoad();
            // 注入HTML并关闭文档流(避免渲染异常)
            executeScript("document.write(arguments[0]); document.close();", htmlContent);
            logger.info("HTML字符串注入完成,开始等待渲染");
            // 等待页面完全渲染(含图片/异步JS)
            waitForPageLoad();
            waitForAllImagesLoaded();
            logger.info("HTML渲染完成");
            return this;
        } catch (Exception e) {
            logger.error("HTML注入/渲染失败", e);
            throw new RuntimeException("HTML注入失败", e);
        }
    }

    /**
     * 等待所有图片加载完成(避免PDF缺图)
     * JDK8 适配点:
     * 1. 移除字符串文本块(JDK15+支持),替换为字符串拼接
     * 2. 确保 Lambda 泛型推导兼容 JDK8
     */
    /**
     * 等待所有图片加载完成(避免PDF缺图)
     * 修复点:
     * 1. JS脚本统一返回数值(0/1),避免布尔值导致类型转换异常
     * 2. Java端接收Number类型,兼容Long/Integer等数值类型
     * 3. 简化逻辑判断,直接基于数值判断是否加载完成
     */
    private void waitForAllImagesLoaded() {
        waitForCondition((Function<WebDriver, Boolean>) webDriver -> {
            // 修正JS脚本:统一返回数值(1=加载完成,0=未完成)
            Number result = (Number) ((JavascriptExecutor) webDriver).executeScript(
                    "var images = document.images;" +
                            "if (images.length === 0) return 1;" + // 无图片时返回1(完成)
                            "var loaded = 0;" +
                            "for (var i = 0; i < images.length; i++) {" +
                            "    if (images[i].complete && !images[i].error) loaded++;" +
                            "}" +
                            "return loaded === images.length ? 1 : 0;" // 有图片时,完成=1,未完成=0
            );
            // 数值转int,判断是否等于1(加载完成)
            return result.intValue() == 1;
        }, defaultTimeoutSeconds);
    }

    /**
     * 页面大小转英寸(CDP的printToPDF参数要求英寸单位)
     * JDK8 适配点:移除 switch 表达式(JDK14+支持),替换为 switch 语句
     */
    private double getPageWidth(String pageSize) {
        String upperPageSize = pageSize.toUpperCase();
        switch (upperPageSize) {
            case "A4":
                return 8.27;
            case "A3":
                return 11.69;
            case "LETTER":
                return 8.5;
            case "LEGAL":
                return 8.5;
            default:
                return 8.27; // 默认A4
        }
    }

    /**
     * 页面高度转英寸
     * JDK8 适配点:移除 switch 表达式(JDK14+支持),替换为 switch 语句
     */
    private double getPageHeight(String pageSize) {
        String upperPageSize = pageSize.toUpperCase();
        switch (upperPageSize) {
            case "A4":
                return 11.69;
            case "A3":
                return 16.54;
            case "LETTER":
                return 11.0;
            case "LEGAL":
                return 14.0;
            default:
                return 11.69; // 默认A4
        }
    }

    // ========== 备选方案:临时文件方式(兼容低版本ChromeDriver) ==========
    /**
     * 临时文件方式生成PDF byte[](若CDP调用失败时使用)
     * @param tempPdfPath 临时文件路径
     * @return PDF字节数组
     */
    public byte[] generatePdfByTempFile(String tempPdfPath) {
        // 1. 构造带打印参数的ChromeOptions
        ChromeOptions options = new ChromeOptions();
        options.addArguments("--headless=new");
        options.addArguments("--disable-gpu");
        options.addArguments("--print-to-pdf=" + tempPdfPath);
        options.addArguments("--print-to-pdf-page-size=" + DEFAULT_PDF_PAGE_SIZE);
        options.addArguments("--print-backgrounds=" + DEFAULT_PRINT_BACKGROUND);
        options.addArguments("--no-margins");

        // 2. 重新初始化Driver(必须重新创建才能生效打印参数)
        WebDriver tempDriver = new ChromeDriver(options);
        try {
            // 3. 复制当前页面的URL/HTML(保持上下文)
            tempDriver.get(this.driver.getCurrentUrl());
            WebDriverWait tempWait = new WebDriverWait(tempDriver, Duration.ofSeconds(defaultTimeoutSeconds));
            tempWait.until(webDriver -> ((JavascriptExecutor) webDriver)
                    .executeScript("return document.readyState").equals("complete"));

            // 4. 读取临时文件并转byte[]
            File pdfFile = new File(tempPdfPath);
            if (!pdfFile.exists() || pdfFile.length() == 0) {
                throw new RuntimeException("临时PDF文件生成失败:" + tempPdfPath);
            }
            byte[] pdfBytes = Files.readAllBytes(pdfFile.toPath());
            logger.info("临时文件方式生成PDF成功,字节数={}", pdfBytes.length);
            return pdfBytes;
        } catch (Exception e) {
            logger.error("临时文件方式生成PDF失败", e);
            throw new RuntimeException("PDF生成失败", e);
        } finally {
            // 5. 清理临时文件+关闭临时Driver
            tempDriver.quit();
            File pdfFile = new File(tempPdfPath);
            if (pdfFile.exists() && !pdfFile.delete()) {
                logger.warn("临时PDF文件清理失败:{}", tempPdfPath);
            }
        }
    }

    // ========== 原有方法保留 + 少量优化 ==========
    public HeadlessBrowserUtil get(String url) {
        try {
            driver.get(url);
            logger.debug("访问URL:{}", url);
            return this;
        } catch (Exception e) {
            logger.error("访问URL失败:{}", url, e);
            throw new RuntimeException("访问URL失败", e);
        }
    }

    public Object executeScript(String script, Object... args) {
        try {
            return ((JavascriptExecutor) driver).executeScript(script, args);
        } catch (Exception e) {
            logger.error("执行JS脚本失败:{}", script.substring(0, Math.min(script.length(), 100)), e);
            throw new RuntimeException("JS脚本执行失败", e);
        }
    }

    public Object executeAsyncScript(String script, Object... args) {
        try {
            return ((JavascriptExecutor) driver).executeAsyncScript(script, args);
        } catch (Exception e) {
            logger.error("执行异步JS脚本失败", e);
            throw new RuntimeException("异步JS脚本执行失败", e);
        }
    }

    public WebElement waitForElement(By locator) {
        return defaultWait.until(ExpectedConditions.presenceOfElementLocated(locator));
    }

    public WebElement waitForElement(By locator, long timeoutSeconds) {
        return new WebDriverWait(driver, Duration.ofSeconds(timeoutSeconds))
                .until(ExpectedConditions.presenceOfElementLocated(locator));
    }

    public WebElement waitForElementVisible(By locator) {
        return defaultWait.until(ExpectedConditions.visibilityOfElementLocated(locator));
    }

    public WebElement waitForElementClickable(By locator) {
        return defaultWait.until(ExpectedConditions.elementToBeClickable(locator));
    }

    public boolean waitForTextContains(By locator, String text) {
        return defaultWait.until(ExpectedConditions.textToBePresentInElementLocated(locator, text));
    }

    public boolean waitForAttributeValue(By locator, String attribute, String value) {
        return defaultWait.until(ExpectedConditions.attributeToBe(locator, attribute, value));
    }

    public boolean waitForTitleContains(String title) {
        return defaultWait.until(ExpectedConditions.titleContains(title));
    }

    public boolean waitForTitleIs(String title) {
        return defaultWait.until(ExpectedConditions.titleIs(title));
    }

    public WebElement waitForJsCompleteMarker(String markerId) {
        return waitForElement(By.id(markerId));
    }

    public int[] waitForJsCompleteWithResult(String markerId, String successAttr, String failAttr) {
        WebElement marker = waitForElement(By.id(markerId));
        String success = marker.getAttribute(successAttr);
        String fail = marker.getAttribute(failAttr);
        return new int[]{
                Integer.parseInt(success != null ? success : "0"),
                Integer.parseInt(fail != null ? fail : "0")
        };
    }

    public String waitForJsCompleteSimple(String inputId) {
        WebElement input = waitForElement(By.id(inputId));
        new WebDriverWait(driver, Duration.ofSeconds(defaultTimeoutSeconds))
                .until((Function<WebDriver, Boolean>) d -> {
                    String val = input.getAttribute("value");
                    return val != null && !val.isEmpty();
                });
        return input.getAttribute("value");
    }

    public boolean waitForBodyDataAttribute(String attributeName, String expectedValue) {
        String fullAttr = "data-" + attributeName;
        return defaultWait.until(ExpectedConditions.attributeToBe(By.tagName("body"), fullAttr, expectedValue));
    }

    public <T> T waitForCondition(Function<WebDriver, T> condition, long timeoutSeconds) {
        return new FluentWait<>(driver)
                .withTimeout(Duration.ofSeconds(timeoutSeconds))
                .pollingEvery(Duration.ofMillis(500))
                .ignoring(NoSuchElementException.class, StaleElementReferenceException.class) // 增加异常忽略
                .until(condition);
    }

    public WebElement findElement(By locator) {
        return driver.findElement(locator);
    }

    public List<WebElement> findElements(By locator) {
        return driver.findElements(locator);
    }

    public HeadlessBrowserUtil click(By locator) {
        waitForElementClickable(locator).click();
        return this;
    }

    public HeadlessBrowserUtil sendKeys(By locator, String text) {
        WebElement element = waitForElementVisible(locator);
        element.clear();
        element.sendKeys(text);
        return this;
    }

    public String getText(By locator) {
        return waitForElementVisible(locator).getText();
    }

    public String getPageSource() {
        return driver.getPageSource();
    }

    public String getCurrentUrl() {
        return driver.getCurrentUrl();
    }

    public String getTitle() {
        return driver.getTitle();
    }

    public HeadlessBrowserUtil refresh() {
        driver.navigate().refresh();
        return this;
    }

    public HeadlessBrowserUtil back() {
        driver.navigate().back();
        return this;
    }

    public HeadlessBrowserUtil forward() {
        driver.navigate().forward();
        return this;
    }

    public byte[] takeScreenshot() {
        return ((TakesScreenshot) driver).getScreenshotAs(OutputType.BYTES);
    }

    public File takeScreenshotAsFile() {
        return ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
    }

    public HeadlessBrowserUtil switchToFrame(By locator) {
        driver.switchTo().frame(waitForElement(locator));
        return this;
    }

    public HeadlessBrowserUtil switchToDefaultContent() {
        driver.switchTo().defaultContent();
        return this;
    }

    /**
     * 【过时】无头模式下无效,建议使用 generatePdfAsBytes()
     */
    @Deprecated
    public HeadlessBrowserUtil triggerPrint() {
        logger.warn("triggerPrint()方法在无头模式下无效,请使用generatePdfAsBytes()生成PDF");
        Actions actions = new Actions(driver);
        actions.keyDown(Keys.CONTROL).sendKeys("p").keyUp(Keys.CONTROL).perform();
        return this;
    }

    public HeadlessBrowserUtil sleep(long seconds) {
        try {
            Thread.sleep(seconds * 1000);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
        return this;
    }

    public HeadlessBrowserUtil waitForPageLoad() {
        defaultWait.until(webDriver -> ((JavascriptExecutor) webDriver)
                .executeScript("return document.readyState").equals("complete"));
        return this;
    }

    public HeadlessBrowserUtil waitForLibraryLoad(String libraryName) {
        defaultWait.until(webDriver -> ((JavascriptExecutor) webDriver)
                .executeScript("return typeof " + libraryName + " !== 'undefined'").equals(true));
        return this;
    }

    @Override
    public void close() {
        if (driver != null) {
            try {
                driver.quit();
                logger.info("ChromeDriver已正常关闭");
            } catch (Exception e) {
                logger.error("ChromeDriver关闭失败", e);
            } finally {
                driver = null;
            }
        }
    }

    public WebDriver getDriver() {
        return driver;
    }

    public HeadlessBrowserUtil setDefaultTimeout(long seconds) {
        this.defaultWait = new WebDriverWait(driver, Duration.ofSeconds(seconds));
        return this;
    }
}
  1. 相关jar下载地址(多退少补)

3.测试验证

  1. 在chrome-headless-shell-linux64.zip解压路径下运行
  2. 测试访问网页并生成PDF(纯无头模式)
bash 复制代码
./chrome-headless-shell \
  --headless \
  --no-sandbox \
  --disable-gpu \
  --print-to-pdf=/tmp/test.pdf \
  https://www.baidu.com
bash 复制代码
./chrome-headless-shell \
  --headless=new \
  --no-sandbox \
  --disable-gpu \
  --print-to-pdf=/tmp/test.pdf \
  https://www.baidu.com
  1. 检查是否生成文件且不为空,则验证成功
相关推荐
zhishidi3 小时前
使用python给pdf文档自动添加目录书签
java·python·pdf
WiChP11 小时前
【V0.1B5】从零开始的2D游戏引擎开发之路
java·服务器·数据库
cch891811 小时前
汇编与Java:底层与高层的编程对决
java·开发语言·汇编
荒川之神12 小时前
拉链表概念与基本设计
java·开发语言·数据库
Highcharts.js12 小时前
适合报表系统的可视化图表|Highcharts支持直接导出PNG和PDF
javascript·数据库·react.js·pdf
cch891812 小时前
汇编与Go:底层到高层的编程差异
java·汇编·golang
chushiyunen12 小时前
python中的@Property和@Setter
java·开发语言·python
禾小西12 小时前
Java中使用正则表达式核心解析
java·python·正则表达式