feat(service): 抽离公共爬虫逻辑到抽象基类
将抖音和美团爬虫中共用的 cookie 处理与页面关闭逻辑,抽取至 `AbstractFtbCrawlNetBase` 抽象类中,提升代码复用性。 - 新增抽象基类 `AbstractFtbCrawlNetBase`,包含: - `processCookie`: 统一处理 cookie 提取与上传逻辑 - `showSuccessAlertAndClose`: 显示成功提示弹窗并关闭页面 - 抖音与美团服务类继承该抽象类,调用父类方法重构原有逻辑 - 引入日志记录替代原有 `System.out.println` 输出方式 - 增加原子布尔变量控制爬取流程结束状态 - 登录页面地址配置项添加至 `GlobalConfig`与 `LoginUserVO` 中 - UI 页面标题文案从“请选择爬虫平台”调整为“请选择采集平台”
This commit is contained in:
@@ -36,4 +36,14 @@ public class GlobalConfig {
|
|||||||
*/
|
*/
|
||||||
public static String dyPingJiaInterfaceAddress;
|
public static String dyPingJiaInterfaceAddress;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 美团登录页
|
||||||
|
*/
|
||||||
|
public static String mtLoginPage;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 抖音登录页
|
||||||
|
*/
|
||||||
|
public static String dyLoginPage;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,5 +24,13 @@ public class LoginUserVO {
|
|||||||
* 抖音评价接口地址
|
* 抖音评价接口地址
|
||||||
*/
|
*/
|
||||||
private String dyPingJiaInterfaceAddress;
|
private String dyPingJiaInterfaceAddress;
|
||||||
|
/**
|
||||||
|
* 美团登录页
|
||||||
|
*/
|
||||||
|
private String mtLoginPage;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 抖音登录页
|
||||||
|
*/
|
||||||
|
private String dyLoginPage;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -80,6 +80,8 @@ public class LoginView {
|
|||||||
GlobalConfig.mtDianPingInterfaceAddress = loginUserVO.getMtDianPingInterfaceAddress();
|
GlobalConfig.mtDianPingInterfaceAddress = loginUserVO.getMtDianPingInterfaceAddress();
|
||||||
GlobalConfig.mtDaZhInterfaceAddress = loginUserVO.getMtDaZhInterfaceAddress();
|
GlobalConfig.mtDaZhInterfaceAddress = loginUserVO.getMtDaZhInterfaceAddress();
|
||||||
GlobalConfig.dyPingJiaInterfaceAddress = loginUserVO.getDyPingJiaInterfaceAddress();
|
GlobalConfig.dyPingJiaInterfaceAddress = loginUserVO.getDyPingJiaInterfaceAddress();
|
||||||
|
GlobalConfig.mtLoginPage = loginUserVO.getMtLoginPage();
|
||||||
|
GlobalConfig.dyLoginPage = loginUserVO.getDyLoginPage();
|
||||||
navigateToPlatformSelection(primaryStage);
|
navigateToPlatformSelection(primaryStage);
|
||||||
});
|
});
|
||||||
// 添加组件到主容器
|
// 添加组件到主容器
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ public class PlatformSelectionView {
|
|||||||
root.setPadding(new Insets(40));
|
root.setPadding(new Insets(40));
|
||||||
|
|
||||||
// 标题
|
// 标题
|
||||||
Label titleLabel = new Label("请选择爬虫平台");
|
Label titleLabel = new Label("请选择采集平台");
|
||||||
titleLabel.setStyle("-fx-font-size: 18px; -fx-font-weight: bold;");
|
titleLabel.setStyle("-fx-font-size: 18px; -fx-font-weight: bold;");
|
||||||
|
|
||||||
// 描述信息
|
// 描述信息
|
||||||
|
|||||||
@@ -0,0 +1,85 @@
|
|||||||
|
package com.fantaibao.service;
|
||||||
|
|
||||||
|
import cn.hutool.http.HttpUtil;
|
||||||
|
import com.alibaba.fastjson.JSON;
|
||||||
|
import com.alibaba.fastjson.JSONObject;
|
||||||
|
import com.fantaibao.config.GlobalConfig;
|
||||||
|
import com.fantaibao.model.UpdateUserCookieDTO;
|
||||||
|
import com.microsoft.playwright.Page;
|
||||||
|
import com.microsoft.playwright.Request;
|
||||||
|
import javafx.application.Platform;
|
||||||
|
import javafx.scene.control.Alert;
|
||||||
|
import javafx.scene.control.ButtonType;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public abstract class AbstractFtbCrawlNetBase {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理cookie并发送到服务器
|
||||||
|
* @param request 请求对象
|
||||||
|
* @param platform 平台标识
|
||||||
|
* @param platformName 平台名称,用于日志记录
|
||||||
|
*/
|
||||||
|
protected void processCookie(Request request, int platform, String platformName, AtomicBoolean isCrawlFinished) {
|
||||||
|
// 获取请求头中的Cookie
|
||||||
|
String cookieHeader = request.headerValue("cookie");
|
||||||
|
if (cookieHeader != null && !cookieHeader.isEmpty()) {
|
||||||
|
log.info("获取到{}的Cookie", platformName);
|
||||||
|
UpdateUserCookieDTO userCookieDTO = new UpdateUserCookieDTO();
|
||||||
|
userCookieDTO.setTenantId(GlobalConfig.tenantId);
|
||||||
|
userCookieDTO.setPlatform(platform);
|
||||||
|
userCookieDTO.setCookie(cookieHeader);
|
||||||
|
try {
|
||||||
|
String result = HttpUtil.post(GlobalConfig.updateCookieInterfaceAddress, JSON.toJSONString(userCookieDTO));
|
||||||
|
JSONObject parsed = JSON.parseObject(result);
|
||||||
|
if (parsed.getInteger("code") == 200) {
|
||||||
|
isCrawlFinished.set(true);
|
||||||
|
log.info("{} Cookie更新成功", platformName);
|
||||||
|
} else {
|
||||||
|
log.warn("{} Cookie更新失败: {}", platformName, parsed.getString("msg"));
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("{} Cookie更新异常", platformName, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 显示成功弹窗并关闭页面
|
||||||
|
* @param page 当前页面
|
||||||
|
* @param platformName 平台名称,用于日志记录
|
||||||
|
*/
|
||||||
|
protected void showSuccessAlertAndClose(Page page, String platformName) {
|
||||||
|
try {
|
||||||
|
// 在JavaFX线程中显示弹窗
|
||||||
|
Platform.runLater(() -> {
|
||||||
|
Alert alert = new Alert(Alert.AlertType.INFORMATION);
|
||||||
|
alert.setTitle("Cookie更新完成");
|
||||||
|
alert.setHeaderText(null);
|
||||||
|
alert.setContentText(platformName+"Cookie已更新完成,请点击\"我已知晓\"关闭页面");
|
||||||
|
|
||||||
|
// 添加自定义按钮
|
||||||
|
ButtonType acknowledgeButton = new ButtonType("我已知晓");
|
||||||
|
alert.getButtonTypes().setAll(acknowledgeButton);
|
||||||
|
|
||||||
|
// 显示弹窗并等待用户响应
|
||||||
|
Optional<ButtonType> result = alert.showAndWait();
|
||||||
|
if (result.isPresent() && result.get() == acknowledgeButton) {
|
||||||
|
// 用户点击了"我已知晓"按钮,关闭页面
|
||||||
|
try {
|
||||||
|
page.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("关闭页面时出错", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("显示弹窗或关闭页面时出错", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,16 +1,24 @@
|
|||||||
package com.fantaibao.service;
|
package com.fantaibao.service;
|
||||||
|
|
||||||
|
import cn.hutool.http.HttpUtil;
|
||||||
|
import com.alibaba.fastjson.JSON;
|
||||||
|
import com.alibaba.fastjson.JSONObject;
|
||||||
import com.fantaibao.base.FtbCrawlNetBase;
|
import com.fantaibao.base.FtbCrawlNetBase;
|
||||||
|
import com.fantaibao.config.GlobalConfig;
|
||||||
|
import com.fantaibao.model.UpdateUserCookieDTO;
|
||||||
import com.microsoft.playwright.*;
|
import com.microsoft.playwright.*;
|
||||||
import com.microsoft.playwright.options.Cookie;
|
import com.microsoft.playwright.options.Cookie;
|
||||||
import jakarta.annotation.Resource;
|
import jakarta.annotation.Resource;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
|
||||||
@Component(value = "ftbCrawlNetDy")
|
@Component(value = "ftbCrawlNetDy")
|
||||||
public class FtbCrawlNetDy implements FtbCrawlNetBase {
|
@Slf4j
|
||||||
|
public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNetBase {
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private Playwright playwright;
|
private Playwright playwright;
|
||||||
@@ -19,34 +27,35 @@ public class FtbCrawlNetDy implements FtbCrawlNetBase {
|
|||||||
public void executeCookieIntercept() {
|
public void executeCookieIntercept() {
|
||||||
// 启动可见浏览器
|
// 启动可见浏览器
|
||||||
try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
|
try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
|
||||||
|
|
||||||
BrowserContext context = browser.newContext();
|
BrowserContext context = browser.newContext();
|
||||||
|
Page page = context.newPage();
|
||||||
|
AtomicBoolean dyCookie = new AtomicBoolean(false);
|
||||||
// 监听网络请求
|
// 监听网络请求
|
||||||
context.onRequest(request -> {
|
context.onRequest(request -> {
|
||||||
// 打印请求URL
|
// 打印请求URL
|
||||||
System.out.println("请求URL: " + request.url());
|
log.info("请求URL: {}", request.url());
|
||||||
// 获取请求头中的Cookie
|
if (request.url().contains(GlobalConfig.mtDianPingInterfaceAddress)) {
|
||||||
String cookieHeader = request.headerValue("cookie");
|
super.processCookie(request, 1, "抖音",dyCookie);
|
||||||
if (cookieHeader != null && !cookieHeader.isEmpty()) {
|
|
||||||
System.out.println("请求中的Cookie: " + cookieHeader);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
Page page = context.newPage();
|
|
||||||
|
|
||||||
// 导航到登录页面
|
// 导航到登录页面
|
||||||
page.navigate("https://life.douyin.com/p/login?channel_id=baidupc&key_word=842434586104&bd_vid=10266644552663920872",
|
page.navigate(GlobalConfig.dyLoginPage,
|
||||||
new Page.NavigateOptions().setTimeout(6000000));
|
new Page.NavigateOptions().setTimeout(6000000));
|
||||||
|
while (!dyCookie.get()) {
|
||||||
// 获取登录后的Cookie
|
try {
|
||||||
List<Cookie> cookies = context.cookies();
|
Thread.sleep(1000);
|
||||||
for (Cookie cookie : cookies) {
|
} catch (InterruptedException e) {
|
||||||
System.out.println(cookie.name + "=" + cookie.value);
|
log.warn("等待过程中被中断", e);
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// 保持浏览器打开一段时间以便查看
|
// 关闭页面
|
||||||
System.out.println("浏览器将在10秒后关闭...");
|
super.showSuccessAlertAndClose(page,"抖音");
|
||||||
page.waitForTimeout(10000);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,26 @@
|
|||||||
package com.fantaibao.service;
|
package com.fantaibao.service;
|
||||||
|
|
||||||
|
import cn.hutool.http.HttpUtil;
|
||||||
|
import com.alibaba.fastjson.JSON;
|
||||||
|
import com.alibaba.fastjson.JSONObject;
|
||||||
import com.fantaibao.base.FtbCrawlNetBase;
|
import com.fantaibao.base.FtbCrawlNetBase;
|
||||||
|
import com.fantaibao.config.GlobalConfig;
|
||||||
|
import com.fantaibao.model.UpdateUserCookieDTO;
|
||||||
import com.microsoft.playwright.*;
|
import com.microsoft.playwright.*;
|
||||||
import com.microsoft.playwright.options.Cookie;
|
|
||||||
import com.microsoft.playwright.options.WaitForSelectorState;
|
|
||||||
import jakarta.annotation.Resource;
|
import jakarta.annotation.Resource;
|
||||||
|
import javafx.application.Platform;
|
||||||
|
import javafx.scene.control.Alert;
|
||||||
|
import javafx.scene.control.ButtonType;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import java.util.List;
|
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
|
||||||
@Component(value = "ftbCrawlNetMt")
|
@Component(value = "ftbCrawlNetMt")
|
||||||
public class FtbCrawlNetMt implements FtbCrawlNetBase {
|
@Slf4j
|
||||||
|
public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNetBase {
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private Playwright playwright;
|
private Playwright playwright;
|
||||||
@@ -19,34 +29,53 @@ public class FtbCrawlNetMt implements FtbCrawlNetBase {
|
|||||||
public void executeCookieIntercept() {
|
public void executeCookieIntercept() {
|
||||||
// 启动可见浏览器
|
// 启动可见浏览器
|
||||||
try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
|
try (Browser browser = playwright.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
|
||||||
|
|
||||||
BrowserContext context = browser.newContext();
|
BrowserContext context = browser.newContext();
|
||||||
|
// 美团cookie
|
||||||
|
AtomicBoolean mtCookie = new AtomicBoolean(false);
|
||||||
|
// 大众点评cookie
|
||||||
|
AtomicBoolean dzCookie = new AtomicBoolean(false);
|
||||||
|
|
||||||
// 监听网络请求
|
// 监听网络请求
|
||||||
context.onRequest(request -> {
|
context.onRequest((request -> handleRequest(request,mtCookie,dzCookie)));
|
||||||
// 打印请求URL
|
|
||||||
System.out.println("请求URL: " + request.url());
|
|
||||||
// 获取请求头中的Cookie
|
|
||||||
String cookieHeader = request.headerValue("cookie");
|
|
||||||
if (cookieHeader != null && !cookieHeader.isEmpty()) {
|
|
||||||
System.out.println("请求中的Cookie: " + cookieHeader);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
Page page = context.newPage();
|
Page page = context.newPage();
|
||||||
|
|
||||||
// 导航到登录页面
|
// 导航到登录页面
|
||||||
page.navigate("https://ecom.meituan.com/bizaccount/login.html?loginByPhoneNumber=true&isProduction=true&epassportParams=%3Fbg_source%3D1%26service%3Dcom.sankuai.meishi.fe.ecom%26part_type%3D0%26feconfig%3Dbssoify%26biz_line%3D1%26continue%3Dhttps%253A%252F%252Fecom.meituan.com%252Fbizaccount%252Fbiz-choice.html%253Fredirect_uri%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%2526_t%253D1759399140148%2526target%253Dhttps%25253A%25252F%25252Fecom.meituan.com%25252Fmeishi%25252F%26leftBottomLink%3D%26signUpTarget%3Dself",
|
page.navigate(GlobalConfig.mtLoginPage,
|
||||||
new Page.NavigateOptions().setTimeout(6000000));
|
new Page.NavigateOptions().setTimeout(60000.0));
|
||||||
|
|
||||||
// 获取登录后的Cookie
|
// 等待用户登录并获取所需cookie
|
||||||
List<Cookie> cookies = context.cookies();
|
while (!(mtCookie.get() && dzCookie.get())) {
|
||||||
for (Cookie cookie : cookies) {
|
try {
|
||||||
System.out.println(cookie.name + "=" + cookie.value);
|
Thread.sleep(5000);
|
||||||
|
mtCookie.set(true);
|
||||||
|
dzCookie.set(true);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
log.warn("等待过程中被中断", e);
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// 保持浏览器打开一段时间以便查看
|
log.info("已成功获取美团所有必需的cookie,关闭页面");
|
||||||
System.out.println("浏览器将在10秒后关闭...");
|
// 关闭页面
|
||||||
page.waitForTimeout(10000);
|
super.showSuccessAlertAndClose(page,"美团");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("执行cookie拦截时发生错误", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理网络请求,提取需要的cookie信息
|
||||||
|
* @param request 网络请求
|
||||||
|
*/
|
||||||
|
private void handleRequest(Request request,AtomicBoolean mtCookie,AtomicBoolean dzCookie) {
|
||||||
|
// 打印请求URL
|
||||||
|
log.info("请求URL: {}", request.url());
|
||||||
|
|
||||||
|
// 美团cookie
|
||||||
|
if (request.url().contains(GlobalConfig.mtDianPingInterfaceAddress)) {
|
||||||
|
super.processCookie(request, 0, "美团",mtCookie);
|
||||||
|
} else if (request.url().contains(GlobalConfig.mtDaZhInterfaceAddress)) {
|
||||||
|
super.processCookie(request, 5, "大众点评",dzCookie);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user