feat(playwright): 增强浏览器反自动化检测能力

为 FtbCrawlNetDy 和 FtbCrawlNetMt 服务添加了反自动化检测配置,包括启动参数、HTTP头设置以及初始化脚本。同时扩展 PlaywrightManager
功能,支持创建隐身模式上下文选项和重新创建实例,提高爬虫稳定性与隐蔽性。
This commit is contained in:
wangchunxiang
2025-10-16 20:42:04 +08:00
parent 1858a18532
commit 481e110566
3 changed files with 150 additions and 10 deletions

View File

@@ -57,7 +57,11 @@ public class PlaywrightManager {
public void close() {
if (playwright != null) {
try {
log.info("开始关闭Playwright实例");
long startTime = System.currentTimeMillis();
playwright.close();
long endTime = System.currentTimeMillis();
log.info("Playwright实例关闭完成耗时: {}ms", (endTime - startTime));
} catch (Exception e) {
log.error("关闭Playwright实例时出错", e);
} finally {
@@ -67,4 +71,30 @@ public class PlaywrightManager {
}
}
/**
* 重新创建Playwright实例
*/
public void recreate() {
close();
try {
this.playwright = Playwright.create();
} catch (Exception e) {
log.error("重新创建Playwright实例失败", e);
throw new RuntimeException("重新创建Playwright实例失败", e);
}
}
/**
* 创建带反检测配置的浏览器上下文选项
* @return 浏览器上下文选项
*/
public com.microsoft.playwright.Browser.NewContextOptions createStealthContextOptions() {
com.microsoft.playwright.Browser.NewContextOptions options = new com.microsoft.playwright.Browser.NewContextOptions()
.setViewportSize(1680, 900);
// 设置用户代理(可以设置为常用浏览器的用户代理)
// options.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
return options;
}
}

View File

@@ -8,6 +8,9 @@ import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -19,11 +22,34 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe
public void executeCookieIntercept() {
// 启动可见浏览器
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
// 设置浏览器窗口大小为1920x1080
Browser.NewContextOptions contextOptions = new Browser.NewContextOptions()
.setViewportSize(1680, 900);
.chromium().launch(getLaunchOptions())) {
// 设置浏览器上下文选项以减少自动化检测
Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions();
// 添加额外的反检测配置
Map<String, String> extraHTTPHeaders = new HashMap<>();
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
contextOptions.setExtraHTTPHeaders(extraHTTPHeaders);
BrowserContext context = browser.newContext(contextOptions);
// 添加反自动化检测脚本
context.addInitScript("""
() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
// 添加一些常见的浏览器属性
Object.defineProperty(navigator, 'languages', {
get: () => ['zh-CN', 'zh', 'en'],
});
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
}
""");
Page page = context.newPage();
AtomicBoolean dyCookie = new AtomicBoolean(false);
// 监听网络请求
@@ -54,6 +80,34 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe
}
/**
* 获取浏览器启动选项,减少自动化检测
* @return 浏览器启动选项
*/
private BrowserType.LaunchOptions getLaunchOptions() {
return new BrowserType.LaunchOptions()
.setHeadless(false)
// 添加这些参数减少自动化检测
.setArgs(Arrays.asList(
"--no-sandbox",
"--disable-blink-features=AutomationControlled",
"--disable-extensions",
"--disable-infobars",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--disable-background-networking",
"--disable-default-apps",
"--disable-features=TranslateUI",
"--disable-hang-monitor",
"--disable-prompt-on-repost",
"--force-color-profile=srgb",
"--metrics-recording-only",
"--no-first-run",
"--password-store=basic",
"--use-mock-keychain"
));
}
}

View File

@@ -8,6 +8,9 @@ import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
@Component(value = "ftbCrawlNetMt")
@@ -18,11 +21,34 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe
public void executeCookieIntercept() {
// 启动可见浏览器
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
.chromium().launch(getLaunchOptions())) {
// 设置浏览器窗口大小为1920x1080
Browser.NewContextOptions contextOptions = new Browser.NewContextOptions()
.setViewportSize(1680, 900);
Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions();
// 添加额外的反检测配置
Map<String, String> extraHTTPHeaders = new HashMap<>();
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
contextOptions.setExtraHTTPHeaders(extraHTTPHeaders);
BrowserContext context = browser.newContext(contextOptions);
// 添加反自动化检测脚本
context.addInitScript("""
() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});
// 添加一些常见的浏览器属性
Object.defineProperty(navigator, 'languages', {
get: () => ['zh-CN', 'zh', 'en'],
});
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});
}
""");
// 美团cookie
AtomicBoolean mtCookie = new AtomicBoolean(false);
// 大众点评cookie
@@ -54,6 +80,36 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe
}
}
/**
* 获取浏览器启动选项,减少自动化检测
* @return 浏览器启动选项
*/
private BrowserType.LaunchOptions getLaunchOptions() {
return new BrowserType.LaunchOptions()
.setHeadless(false)
// 添加这些参数减少自动化检测
.setArgs(Arrays.asList(
"--no-sandbox",
"--disable-blink-features=AutomationControlled",
"--disable-extensions",
"--disable-infobars",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-ipc-flooding-protection",
"--disable-background-networking",
"--disable-default-apps",
"--disable-features=TranslateUI",
"--disable-hang-monitor",
"--disable-prompt-on-repost",
"--force-color-profile=srgb",
"--metrics-recording-only",
"--no-first-run",
"--password-store=basic",
"--use-mock-keychain"
));
}
/**
* 处理网络请求提取需要的cookie信息
* @param request 网络请求