From c1e6dc67b114ea95ae155032edd25004a6e0cae8 Mon Sep 17 00:00:00 2001 From: wangchunxiang <526328077@qq.com> Date: Sat, 11 Oct 2025 15:31:24 +0800 Subject: [PATCH] =?UTF-8?q?feat(playwright):=20=E6=B7=BB=E5=8A=A0Cookie?= =?UTF-8?q?=E6=8B=A6=E6=88=AA=E7=A4=BA=E4=BE=8B=E5=92=8C=E7=99=BB=E5=BD=95?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B=E5=8A=9F=E8=83=BD=E6=96=B0=E5=A2=9EPlaywrigh?= =?UTF-8?q?tService=E6=9C=8D=E5=8A=A1=EF=BC=8C=E6=8F=90=E4=BE=9BCookie?= =?UTF-8?q?=E6=8B=A6=E6=88=AA=E4=B8=8E=E6=89=8B=E5=8A=A8=E7=99=BB=E5=BD=95?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B=E3=80=82=E6=96=B0=E5=A2=9EDesktopApplication?= =?UTF-8?q?=E6=A1=8C=E9=9D=A2=E5=BA=94=E7=94=A8=E5=85=A5=E5=8F=A3=EF=BC=8C?= =?UTF-8?q?=E9=9B=86=E6=88=90JavaFX=E7=95=8C=E9=9D=A2=E3=80=82=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E7=9B=B8=E5=85=B3=E4=BE=9D=E8=B5=96=EF=BC=9APlaywrigh?= =?UTF-8?q?t=E3=80=81JavaFX=E3=80=81Spring=20Boot=E7=AD=89=E3=80=82=20?= =?UTF-8?q?=E9=85=8D=E7=BD=AEMaven=E6=8F=92=E4=BB=B6=E6=94=AF=E6=8C=81Java?= =?UTF-8?q?FX=E5=BA=94=E7=94=A8=E6=89=93=E5=8C=85=E3=80=82=20=E5=AE=8C?= =?UTF-8?q?=E5=96=84README=E6=96=87=E6=A1=A3=EF=BC=8C=E8=AF=B4=E6=98=8E?= =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E7=BB=93=E6=9E=84=E4=B8=8E=E8=BF=90=E8=A1=8C?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/.gitignore | 8 + .idea/compiler.xml | 27 ++++ .idea/encodings.xml | 7 + .idea/git_toolbox_blame.xml | 6 + .idea/inspectionProfiles/Project_Default.xml | 5 + .idea/jarRepositories.xml | 20 +++ .idea/misc.xml | 15 ++ .idea/vcs.xml | 6 + README.md | 34 ++++- pom.xml | 142 +++++++++++++++++ .../com/fantaibao/DesktopApplication.java | 95 ++++++++++++ .../PlaywrightInterceptCookieExample.java | 68 +++++++++ src/main/java/com/fantaibao/Test.java | 55 +++++++ .../fantaibao/service/PlaywrightService.java | 143 ++++++++++++++++++ src/main/java/module-info.java | 16 ++ src/main/resources/application.yml | 16 ++ 16 files changed, 662 insertions(+), 1 deletion(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/compiler.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/git_toolbox_blame.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/jarRepositories.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/vcs.xml create mode 100644 pom.xml create mode 100644 src/main/java/com/fantaibao/DesktopApplication.java create mode 100644 src/main/java/com/fantaibao/PlaywrightInterceptCookieExample.java create mode 100644 src/main/java/com/fantaibao/Test.java create mode 100644 src/main/java/com/fantaibao/service/PlaywrightService.java create mode 100644 src/main/java/module-info.java create mode 100644 src/main/resources/application.yml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..2b262f4 --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/git_toolbox_blame.xml b/.idea/git_toolbox_blame.xml new file mode 100644 index 0000000..7dc1249 --- /dev/null +++ b/.idea/git_toolbox_blame.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..8d66637 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..e6b00eb --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..5d6b03c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,15 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 9445140..7f7fbf4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,35 @@ # fantaibao-crawler-desktop -爬虫桌面 \ No newline at end of file +饭太煲爬虫桌面程序 + +## 项目介绍 + +这是一个基于Spring Boot和JavaFX的Windows桌面应用程序,集成了Playwright爬虫功能。 + +## 功能特性 + +1. 桌面应用程序界面 +2. Playwright网页爬虫功能 +3. Cookie拦截和管理 +4. 网站登录模拟 + +## 技术栈 + +- Spring Boot 3.4 +- JavaFX 17 +- Playwright 1.40 +- Maven 3.x + +## 运行方式 + +```bash +mvn clean spring-boot:run +``` + +## 打包 + +```bash +mvn clean package +``` + +运行打包后的程序需要Java 17+环境。 \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..6439cb7 --- /dev/null +++ b/pom.xml @@ -0,0 +1,142 @@ + + + 4.0.0 + com.ftb + fantaibao-crawler-desktop + 0.0.1-SNAPSHOT + fantaibao-crawler-desktop + fantaibao-crawler-desktop + + + 17 + UTF-8 + UTF-8 + 3.4.0 + 3.8.1 + 1.40.0 + 17.0.2 + 0.0.8 + + + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.boot + spring-boot-starter-logging + + + + org.projectlombok + lombok + + + + + org.slf4j + slf4j-api + + + + org.seleniumhq.selenium + selenium-java + + + error_prone_annotations + com.google.errorprone + + + + + + com.alibaba + fastjson + 1.2.83 + + + + org.apache.httpcomponents.client5 + httpclient5 + + + + cn.hutool + hutool-all + 5.8.22 + + + + com.microsoft.playwright + playwright + ${playwright.version} + + + + + org.openjfx + javafx-controls + ${javafx.version} + + + + org.openjfx + javafx-fxml + ${javafx.version} + + + + + + + org.springframework.boot + spring-boot-dependencies + ${spring-boot.version} + pom + import + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + ${java.version} + + -parameters + + + + org.projectlombok + lombok + 1.18.30 + + + + + + + + org.openjfx + javafx-maven-plugin + ${javafx.maven.plugin.version} + + com.fantaibao.JavaFxApplication + app + fxapp + fxapp-zip + + + + + + \ No newline at end of file diff --git a/src/main/java/com/fantaibao/DesktopApplication.java b/src/main/java/com/fantaibao/DesktopApplication.java new file mode 100644 index 0000000..4caa5ba --- /dev/null +++ b/src/main/java/com/fantaibao/DesktopApplication.java @@ -0,0 +1,95 @@ +package com.fantaibao; + +import com.fantaibao.service.PlaywrightService; +import javafx.application.Application; +import javafx.application.Platform; +import javafx.geometry.Insets; +import javafx.scene.Scene; +import javafx.scene.control.Button; +import javafx.scene.control.TextArea; +import javafx.scene.layout.VBox; +import javafx.stage.Stage; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.ConfigurableApplicationContext; + +//@SpringBootApplication +public class DesktopApplication extends Application { + + private TextArea textArea; + private ConfigurableApplicationContext applicationContext; + private PlaywrightService playwrightService; + + public static void main(String[] args) { + // 设置JavaFX系统属性 + System.setProperty("javafx.application.platform", "egl"); + SpringApplication.run(DesktopApplication.class, args); + launch(args); + } + + @Override + public void init() throws Exception { + super.init(); + applicationContext = (ConfigurableApplicationContext) SpringApplication.run(DesktopApplication.class); + //playwrightService = applicationContext.getBean(PlaywrightService.class); + playwrightService = new PlaywrightService(); + } + + @Override + public void start(Stage primaryStage) { + primaryStage.setTitle("饭太煲爬虫桌面程序"); + + textArea = new TextArea(); + textArea.setPrefRowCount(20); + textArea.setPrefColumnCount(50); + textArea.setEditable(false); + + Button playwrightButton = new Button("运行Cookie拦截示例"); + playwrightButton.setOnAction(e -> runPlaywrightCookieExample()); + + Button loginButton = new Button("运行登录示例"); + loginButton.setOnAction(e -> runLoginExample()); + + VBox vBox = new VBox(10, playwrightButton, loginButton, textArea); + vBox.setPadding(new Insets(10)); + + Scene scene = new Scene(vBox, 800, 600); + primaryStage.setScene(scene); + primaryStage.setOnCloseRequest(e -> { + if (applicationContext != null) { + applicationContext.close(); + } + Platform.exit(); + }); + primaryStage.show(); + } + + private void runPlaywrightCookieExample() { + appendText("开始运行Playwright Cookie拦截示例...\n"); + Thread thread = new Thread(() -> + playwrightService.interceptCookieExample(this::appendText) + ); + thread.setDaemon(true); + thread.start(); + } + + private void runLoginExample() { + appendText("开始运行登录示例...\n"); + Thread thread = new Thread(() -> + playwrightService.loginExample(this::appendText) + ); + thread.setDaemon(true); + thread.start(); + } + + private void appendText(String text) { + Platform.runLater(() -> textArea.appendText(text + "\n")); + } + + @Override + public void stop() { + if (applicationContext != null) { + applicationContext.close(); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/fantaibao/PlaywrightInterceptCookieExample.java b/src/main/java/com/fantaibao/PlaywrightInterceptCookieExample.java new file mode 100644 index 0000000..324d9a8 --- /dev/null +++ b/src/main/java/com/fantaibao/PlaywrightInterceptCookieExample.java @@ -0,0 +1,68 @@ +package com.fantaibao; + +import com.microsoft.playwright.*; +import com.microsoft.playwright.options.Cookie; + +import java.util.List; +import java.util.Scanner; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +public class PlaywrightInterceptCookieExample { + public static void main(String[] args) { + try (Playwright playwright = Playwright.create()) { + // 启动可见浏览器 + Browser browser = playwright.chromium().launch( + new BrowserType.LaunchOptions().setHeadless(false) + ); + + // 创建浏览器上下文 + BrowserContext context = browser.newContext(); + + // 用于存储拦截到的Cookie + ConcurrentMap interceptedCookies = new ConcurrentHashMap<>(); + + // 监听网络请求 + context.onRequest(request -> { + // 打印请求URL + System.out.println("请求URL: " + request.url()); + + // 获取请求头中的Cookie + String cookieHeader = request.headerValue("cookie"); + if (cookieHeader != null && !cookieHeader.isEmpty()) { + System.out.println("请求中的Cookie: " + cookieHeader); + interceptedCookies.put(request.url(), cookieHeader); + } + }); + + Page page = context.newPage(); + + // 导航到示例页面(这里使用百度作为示例) + page.navigate("https://www.baidu.com"); + + System.out.println("页面加载完成,请执行点击操作..."); + System.out.println("点击按钮后按回车键继续获取Cookie..."); + + // 等待用户执行点击操作 + Scanner scanner = new Scanner(System.in); + scanner.nextLine(); + + // 获取当前页面的所有Cookie + List cookies = context.cookies(); + System.out.println("\n当前页面的Cookie:"); + for (Cookie cookie : cookies) { + System.out.println(cookie.name + "=" + cookie.value + + " (域名: " + cookie.domain + ", 路径: " + cookie.path + ")"); + } + + // 显示拦截到的请求Cookie + System.out.println("\n拦截到的请求Cookie:"); + interceptedCookies.forEach((url, cookie) -> + System.out.println("URL: " + url + "\nCookie: " + cookie + "\n")); + + // 保持浏览器打开一段时间以便查看 + System.out.println("浏览器将在10秒后关闭..."); + page.waitForTimeout(10000); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/fantaibao/Test.java b/src/main/java/com/fantaibao/Test.java new file mode 100644 index 0000000..16f304f --- /dev/null +++ b/src/main/java/com/fantaibao/Test.java @@ -0,0 +1,55 @@ +package com.fantaibao; + +import com.microsoft.playwright.*; +import com.microsoft.playwright.options.Cookie; +import com.microsoft.playwright.options.WaitForSelectorState; + +import java.util.List; +import java.util.Scanner; + +public class Test { + public static void main(String[] args) { + // 启动可见浏览器 + try (Playwright playwright = Playwright.create(); + Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) { + + BrowserContext context = browser.newContext(); + // 监听网络请求 + context.onRequest(request -> { + // 打印请求URL + System.out.println("请求URL: " + request.url()); + // 获取请求头中的Cookie + String cookieHeader = request.headerValue("cookie"); + if (cookieHeader != null && !cookieHeader.isEmpty()) { + System.out.println("请求中的Cookie: " + cookieHeader); + } + }); + + Page page = context.newPage(); + + // 导航到登录页面 + page.navigate("https://ecom.meituan.com/bizaccount/login.html?loginByPhoneNumber=true&isProduction=true&epassportParams=%3Fbg_source%3D1%26service%3Dcom.sankuai.meishi.fe.ecom%26part_type%3D0%26feconfig%3Dbssoify%26biz_line%3D1%26continue%3Dhttps%253A%252F%252Fecom.meituan.com%252Fbizaccount%252Fbiz-choice.html%253Fredirect_uri%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%2526_t%253D1759399140148%2526target%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%26leftBottomLink%3D%26signUpTarget%3Dself"); + // page.navigate("https://playwright.dev", new Page.NavigateOptions().setTimeout(6000000)); + + // 等待用户手动登录 + System.out.println("请在浏览器中手动登录..."); + System.out.println("登录完成后按回车键继续..."); + Scanner scanner = new Scanner(System.in); + scanner.nextLine(); + + // 等待元素可见并启用后再点击 + page.locator(".getStarted_Sjon").waitFor(new Locator.WaitForOptions().setState(WaitForSelectorState.VISIBLE)); + page.locator(".getStarted_Sjon").click(); + + + // 获取登录后的Cookie + List cookies = context.cookies(); + for (Cookie cookie : cookies) { + System.out.println(cookie.name + "=" + cookie.value); + } + // 保持浏览器打开一段时间以便查看 + System.out.println("浏览器将在10秒后关闭..."); + page.waitForTimeout(10000); + } + } +} diff --git a/src/main/java/com/fantaibao/service/PlaywrightService.java b/src/main/java/com/fantaibao/service/PlaywrightService.java new file mode 100644 index 0000000..2fc9e13 --- /dev/null +++ b/src/main/java/com/fantaibao/service/PlaywrightService.java @@ -0,0 +1,143 @@ +package com.fantaibao.service; + +import com.microsoft.playwright.*; +import com.microsoft.playwright.options.Cookie; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Consumer; + +@Service +@Slf4j +public class PlaywrightService { + + public void interceptCookieExample(Consumer logger) { + try (Playwright playwright = Playwright.create()) { + // 启动可见浏览器 + Browser browser = playwright.chromium().launch( + new BrowserType.LaunchOptions().setHeadless(false) + ); + + // 创建浏览器上下文 + BrowserContext context = browser.newContext(); + + // 用于存储拦截到的Cookie + ConcurrentMap interceptedCookies = new ConcurrentHashMap<>(); + + // 监听网络请求 + context.onRequest(request -> { + // 打印请求URL + String logMessage = "请求URL: " + request.url(); + logger.accept(logMessage); + log.info(logMessage); + + // 获取请求头中的Cookie + String cookieHeader = request.headerValue("cookie"); + if (cookieHeader != null && !cookieHeader.isEmpty()) { + String cookieMessage = "请求中的Cookie: " + cookieHeader; + logger.accept(cookieMessage); + log.info(cookieMessage); + interceptedCookies.put(request.url(), cookieHeader); + } + }); + + Page page = context.newPage(); + + // 导航到示例页面(这里使用百度作为示例) + page.navigate("https://www.baidu.com"); + + logger.accept("页面加载完成,请执行点击操作..."); + logger.accept("点击按钮后按回车键继续获取Cookie..."); + + // 等待用户执行点击操作 (在实际桌面应用中需要替换为UI交互) + try { + Thread.sleep(10000); // 等待10秒模拟用户操作 + } catch (InterruptedException e) { + log.warn("等待过程中被中断"); + } + + // 获取当前页面的所有Cookie + List cookies = context.cookies(); + logger.accept("\n当前页面的Cookie:"); + for (Cookie cookie : cookies) { + String cookieInfo = cookie.name + "=" + cookie.value + + " (域名: " + cookie.domain + ", 路径: " + cookie.path + ")"; + logger.accept(cookieInfo); + log.info("Cookie: {}", cookieInfo); + } + + // 显示拦截到的请求Cookie + logger.accept("\n拦截到的请求Cookie:"); + interceptedCookies.forEach((url, cookie) -> { + String info = "URL: " + url + "\nCookie: " + cookie + "\n"; + logger.accept(info); + log.info("拦截Cookie - {}: {}", url, cookie); + }); + + // 保持浏览器打开一段时间以便查看 + logger.accept("浏览器将在5秒后关闭..."); + page.waitForTimeout(5000); + } catch (Exception e) { + String errorMsg = "执行过程中出现错误: " + e.getMessage(); + logger.accept(errorMsg); + log.error("Playwright执行出错", e); + } + } + + public void loginExample(Consumer logger) { + try (Playwright playwright = Playwright.create(); + Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) { + + BrowserContext context = browser.newContext(); + // 监听网络请求 + context.onRequest(request -> { + // 打印请求URL + String logMessage = "请求URL: " + request.url(); + logger.accept(logMessage); + log.info(logMessage); + + // 获取请求头中的Cookie + String cookieHeader = request.headerValue("cookie"); + if (cookieHeader != null && !cookieHeader.isEmpty()) { + String cookieMessage = "请求中的Cookie: " + cookieHeader; + logger.accept(cookieMessage); + log.info(cookieMessage); + } + }); + + Page page = context.newPage(); + + // 导航到登录页面 + page.navigate("https://ecom.meituan.com/bizaccount/login.html?loginByPhoneNumber=true&isProduction=true&epassportParams=%3Fbg_source%3D1%26service%3Dcom.sankuai.meishi.fe.ecom%26part_type%3D0%26feconfig%3Dbssoify%26biz_line%3D1%26continue%3Dhttps%253A%252F%252Fecom.meituan.com%252Fbizaccount%252Fbiz-choice.html%253Fredirect_uri%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%2526_t%253D1759399140148%2526target%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%26leftBottomLink%3D%26signUpTarget%3Dself"); + + logger.accept("请在浏览器中手动登录..."); + logger.accept("登录完成后程序将继续执行..."); + + // 在真实应用中,这里应该通过UI事件触发而不是固定等待 + try { + Thread.sleep(15000); // 等待15秒让用户登录 + } catch (InterruptedException e) { + log.warn("等待登录过程中被中断"); + } + + // 获取登录后的Cookie + List cookies = context.cookies(); + logger.accept("登录后的Cookie信息:"); + for (Cookie cookie : cookies) { + String cookieInfo = cookie.name + "=" + cookie.value; + logger.accept(cookieInfo); + log.info("登录Cookie: {}", cookieInfo); + } + + logger.accept("浏览器将在5秒后关闭..."); + page.waitForTimeout(5000); + } catch (Exception e) { + String errorMsg = "执行过程中出现错误: " + e.getMessage(); + logger.accept(errorMsg); + log.error("Playwright登录示例执行出错", e); + } + } +} \ No newline at end of file diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java new file mode 100644 index 0000000..6846143 --- /dev/null +++ b/src/main/java/module-info.java @@ -0,0 +1,16 @@ +module fantaibao.crawler.desktop { + requires javafx.controls; + requires javafx.fxml; + requires spring.boot; + requires spring.boot.autoconfigure; + requires spring.context; + requires spring.beans; + requires playwright; + requires org.slf4j; + requires lombok; + + opens com.fantaibao to spring.core, spring.beans, spring.context, + javafx.fxml, javafx.base, javafx.graphics, spring.boot, spring.boot.autoconfigure; + + exports com.fantaibao; +} \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..d2a72f4 --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,16 @@ +logging: + level: + com.fantaibao: INFO + org.springframework: INFO + pattern: + console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n" + file: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n" + file: + name: logs/application.log + +spring: + main: + banner-mode: console + +server: + port: 8080 \ No newline at end of file