feat(service): 抽离公共爬虫逻辑到抽象基类

将抖音和美团爬虫中共用的 cookie 处理与页面关闭逻辑,抽取至 `AbstractFtbCrawlNetBase` 抽象类中,提升代码复用性。

- 新增抽象基类 `AbstractFtbCrawlNetBase`,包含:
  - `processCookie`: 统一处理 cookie 提取与上传逻辑 - `showSuccessAlertAndClose`: 显示成功提示弹窗并关闭页面
- 抖音与美团服务类继承该抽象类,调用父类方法重构原有逻辑
- 引入日志记录替代原有 `System.out.println` 输出方式
- 增加原子布尔变量控制爬取流程结束状态
- 登录页面地址配置项添加至 `GlobalConfig`与 `LoginUserVO` 中
- UI 页面标题文案从“请选择爬虫平台”调整为“请选择采集平台”
This commit is contained in:
wangchunxiang
2025-10-14 14:56:37 +08:00
parent aa3a0ec226
commit 0a810aadb0
7 changed files with 190 additions and 47 deletions

View File

@@ -36,4 +36,14 @@ public class GlobalConfig {
*/ */
public static String dyPingJiaInterfaceAddress; public static String dyPingJiaInterfaceAddress;
/**
* 美团登录页
*/
public static String mtLoginPage;
/**
* 抖音登录页
*/
public static String dyLoginPage;
} }

View File

@@ -24,5 +24,13 @@ public class LoginUserVO {
* 抖音评价接口地址 * 抖音评价接口地址
*/ */
private String dyPingJiaInterfaceAddress; private String dyPingJiaInterfaceAddress;
/**
* 美团登录页
*/
private String mtLoginPage;
/**
* 抖音登录页
*/
private String dyLoginPage;
} }

View File

@@ -80,6 +80,8 @@ public class LoginView {
GlobalConfig.mtDianPingInterfaceAddress = loginUserVO.getMtDianPingInterfaceAddress(); GlobalConfig.mtDianPingInterfaceAddress = loginUserVO.getMtDianPingInterfaceAddress();
GlobalConfig.mtDaZhInterfaceAddress = loginUserVO.getMtDaZhInterfaceAddress(); GlobalConfig.mtDaZhInterfaceAddress = loginUserVO.getMtDaZhInterfaceAddress();
GlobalConfig.dyPingJiaInterfaceAddress = loginUserVO.getDyPingJiaInterfaceAddress(); GlobalConfig.dyPingJiaInterfaceAddress = loginUserVO.getDyPingJiaInterfaceAddress();
GlobalConfig.mtLoginPage = loginUserVO.getMtLoginPage();
GlobalConfig.dyLoginPage = loginUserVO.getDyLoginPage();
navigateToPlatformSelection(primaryStage); navigateToPlatformSelection(primaryStage);
}); });
// 添加组件到主容器 // 添加组件到主容器

View File

@@ -35,7 +35,7 @@ public class PlatformSelectionView {
root.setPadding(new Insets(40)); root.setPadding(new Insets(40));
// 标题 // 标题
Label titleLabel = new Label("请选择爬虫平台"); Label titleLabel = new Label("请选择采集平台");
titleLabel.setStyle("-fx-font-size: 18px; -fx-font-weight: bold;"); titleLabel.setStyle("-fx-font-size: 18px; -fx-font-weight: bold;");
// 描述信息 // 描述信息

View File

@@ -0,0 +1,85 @@
package com.fantaibao.service;
import cn.hutool.http.HttpUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.fantaibao.config.GlobalConfig;
import com.fantaibao.model.UpdateUserCookieDTO;
import com.microsoft.playwright.Page;
import com.microsoft.playwright.Request;
import javafx.application.Platform;
import javafx.scene.control.Alert;
import javafx.scene.control.ButtonType;
import lombok.extern.slf4j.Slf4j;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
@Slf4j
public abstract class AbstractFtbCrawlNetBase {
/**
* 处理cookie并发送到服务器
* @param request 请求对象
* @param platform 平台标识
* @param platformName 平台名称,用于日志记录
*/
protected void processCookie(Request request, int platform, String platformName, AtomicBoolean isCrawlFinished) {
// 获取请求头中的Cookie
String cookieHeader = request.headerValue("cookie");
if (cookieHeader != null && !cookieHeader.isEmpty()) {
log.info("获取到{}的Cookie", platformName);
UpdateUserCookieDTO userCookieDTO = new UpdateUserCookieDTO();
userCookieDTO.setTenantId(GlobalConfig.tenantId);
userCookieDTO.setPlatform(platform);
userCookieDTO.setCookie(cookieHeader);
try {
String result = HttpUtil.post(GlobalConfig.updateCookieInterfaceAddress, JSON.toJSONString(userCookieDTO));
JSONObject parsed = JSON.parseObject(result);
if (parsed.getInteger("code") == 200) {
isCrawlFinished.set(true);
log.info("{} Cookie更新成功", platformName);
} else {
log.warn("{} Cookie更新失败: {}", platformName, parsed.getString("msg"));
}
} catch (Exception e) {
log.error("{} Cookie更新异常", platformName, e);
}
}
}
/**
* 显示成功弹窗并关闭页面
* @param page 当前页面
* @param platformName 平台名称,用于日志记录
*/
protected void showSuccessAlertAndClose(Page page, String platformName) {
try {
// 在JavaFX线程中显示弹窗
Platform.runLater(() -> {
Alert alert = new Alert(Alert.AlertType.INFORMATION);
alert.setTitle("Cookie更新完成");
alert.setHeaderText(null);
alert.setContentText(platformName+"Cookie已更新完成请点击\"我已知晓\"关闭页面");
// 添加自定义按钮
ButtonType acknowledgeButton = new ButtonType("我已知晓");
alert.getButtonTypes().setAll(acknowledgeButton);
// 显示弹窗并等待用户响应
Optional<ButtonType> result = alert.showAndWait();
if (result.isPresent() && result.get() == acknowledgeButton) {
// 用户点击了"我已知晓"按钮,关闭页面
try {
page.close();
} catch (Exception e) {
log.error("关闭页面时出错", e);
}
}
});
} catch (Exception e) {
log.error("显示弹窗或关闭页面时出错", e);
}
}
}

View File

@@ -1,16 +1,24 @@
package com.fantaibao.service; package com.fantaibao.service;
import cn.hutool.http.HttpUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.fantaibao.base.FtbCrawlNetBase; import com.fantaibao.base.FtbCrawlNetBase;
import com.fantaibao.config.GlobalConfig;
import com.fantaibao.model.UpdateUserCookieDTO;
import com.microsoft.playwright.*; import com.microsoft.playwright.*;
import com.microsoft.playwright.options.Cookie; import com.microsoft.playwright.options.Cookie;
import jakarta.annotation.Resource; import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.List; import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
@Component(value = "ftbCrawlNetDy") @Component(value = "ftbCrawlNetDy")
public class FtbCrawlNetDy implements FtbCrawlNetBase { @Slf4j
public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNetBase {
@Resource @Resource
private Playwright playwright; private Playwright playwright;
@@ -19,34 +27,35 @@ public class FtbCrawlNetDy implements FtbCrawlNetBase {
public void executeCookieIntercept() { public void executeCookieIntercept() {
// 启动可见浏览器 // 启动可见浏览器
try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) { try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
BrowserContext context = browser.newContext(); BrowserContext context = browser.newContext();
Page page = context.newPage();
AtomicBoolean dyCookie = new AtomicBoolean(false);
// 监听网络请求 // 监听网络请求
context.onRequest(request -> { context.onRequest(request -> {
// 打印请求URL // 打印请求URL
System.out.println("请求URL: " + request.url()); log.info("请求URL: {}", request.url());
// 获取请求头中的Cookie if (request.url().contains(GlobalConfig.mtDianPingInterfaceAddress)) {
String cookieHeader = request.headerValue("cookie"); super.processCookie(request, 1, "抖音",dyCookie);
if (cookieHeader != null && !cookieHeader.isEmpty()) {
System.out.println("请求中的Cookie: " + cookieHeader);
} }
}); });
Page page = context.newPage();
// 导航到登录页面 // 导航到登录页面
page.navigate("https://life.douyin.com/p/login?channel_id=baidupc&key_word=842434586104&bd_vid=10266644552663920872", page.navigate(GlobalConfig.dyLoginPage,
new Page.NavigateOptions().setTimeout(6000000)); new Page.NavigateOptions().setTimeout(6000000));
while (!dyCookie.get()) {
// 获取登录后的Cookie try {
List<Cookie> cookies = context.cookies(); Thread.sleep(1000);
for (Cookie cookie : cookies) { } catch (InterruptedException e) {
System.out.println(cookie.name + "=" + cookie.value); log.warn("等待过程中被中断", e);
Thread.currentThread().interrupt();
break;
} }
// 保持浏览器打开一段时间以便查看 }
System.out.println("浏览器将在10秒后关闭..."); // 关闭页面
page.waitForTimeout(10000); super.showSuccessAlertAndClose(page,"抖音");
} }
} }
} }

View File

@@ -1,16 +1,26 @@
package com.fantaibao.service; package com.fantaibao.service;
import cn.hutool.http.HttpUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.fantaibao.base.FtbCrawlNetBase; import com.fantaibao.base.FtbCrawlNetBase;
import com.fantaibao.config.GlobalConfig;
import com.fantaibao.model.UpdateUserCookieDTO;
import com.microsoft.playwright.*; import com.microsoft.playwright.*;
import com.microsoft.playwright.options.Cookie;
import com.microsoft.playwright.options.WaitForSelectorState;
import jakarta.annotation.Resource; import jakarta.annotation.Resource;
import javafx.application.Platform;
import javafx.scene.control.Alert;
import javafx.scene.control.ButtonType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
@Component(value = "ftbCrawlNetMt") @Component(value = "ftbCrawlNetMt")
public class FtbCrawlNetMt implements FtbCrawlNetBase { @Slf4j
public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNetBase {
@Resource @Resource
private Playwright playwright; private Playwright playwright;
@@ -19,34 +29,53 @@ public class FtbCrawlNetMt implements FtbCrawlNetBase {
public void executeCookieIntercept() { public void executeCookieIntercept() {
// 启动可见浏览器 // 启动可见浏览器
try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) { try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
BrowserContext context = browser.newContext(); BrowserContext context = browser.newContext();
// 美团cookie
AtomicBoolean mtCookie = new AtomicBoolean(false);
// 大众点评cookie
AtomicBoolean dzCookie = new AtomicBoolean(false);
// 监听网络请求 // 监听网络请求
context.onRequest(request -> { context.onRequest((request -> handleRequest(request,mtCookie,dzCookie)));
// 打印请求URL
System.out.println("请求URL: " + request.url());
// 获取请求头中的Cookie
String cookieHeader = request.headerValue("cookie");
if (cookieHeader != null && !cookieHeader.isEmpty()) {
System.out.println("请求中的Cookie: " + cookieHeader);
}
});
Page page = context.newPage(); Page page = context.newPage();
// 导航到登录页面 // 导航到登录页面
page.navigate("https://ecom.meituan.com/bizaccount/login.html?loginByPhoneNumber=true&isProduction=true&epassportParams=%3Fbg_source%3D1%26service%3Dcom.sankuai.meishi.fe.ecom%26part_type%3D0%26feconfig%3Dbssoify%26biz_line%3D1%26continue%3Dhttps%253A%252F%252Fecom.meituan.com%252Fbizaccount%252Fbiz-choice.html%253Fredirect_uri%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%2526_t%253D1759399140148%2526target%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%26leftBottomLink%3D%26signUpTarget%3Dself", page.navigate(GlobalConfig.mtLoginPage,
new Page.NavigateOptions().setTimeout(6000000)); new Page.NavigateOptions().setTimeout(60000.0));
// 获取登录后的Cookie // 等待用户登录并获取所需cookie
List<Cookie> cookies = context.cookies(); while (!(mtCookie.get() && dzCookie.get())) {
for (Cookie cookie : cookies) { try {
System.out.println(cookie.name + "=" + cookie.value); Thread.sleep(5000);
mtCookie.set(true);
dzCookie.set(true);
} catch (InterruptedException e) {
log.warn("等待过程中被中断", e);
Thread.currentThread().interrupt();
break;
}
}
log.info("已成功获取美团所有必需的cookie关闭页面");
// 关闭页面
super.showSuccessAlertAndClose(page,"美团");
} catch (Exception e) {
log.error("执行cookie拦截时发生错误", e);
} }
// 保持浏览器打开一段时间以便查看
System.out.println("浏览器将在10秒后关闭...");
page.waitForTimeout(10000);
} }
} /**
* 处理网络请求提取需要的cookie信息
* @param request 网络请求
*/
private void handleRequest(Request request,AtomicBoolean mtCookie,AtomicBoolean dzCookie) {
// 打印请求URL
log.info("请求URL: {}", request.url());
// 美团cookie
if (request.url().contains(GlobalConfig.mtDianPingInterfaceAddress)) {
super.processCookie(request, 0, "美团",mtCookie);
} else if (request.url().contains(GlobalConfig.mtDaZhInterfaceAddress)) {
super.processCookie(request, 5, "大众点评",dzCookie);
}
}
} }