feat(playwright): 增强浏览器反自动化检测能力
为 FtbCrawlNetDy 和 FtbCrawlNetMt 服务添加了反自动化检测配置,包括启动参数、HTTP头设置以及初始化脚本。同时扩展 PlaywrightManager 功能,支持创建隐身模式上下文选项和重新创建实例,提高爬虫稳定性与隐蔽性。
This commit is contained in:
@@ -57,7 +57,11 @@ public class PlaywrightManager {
|
|||||||
public void close() {
|
public void close() {
|
||||||
if (playwright != null) {
|
if (playwright != null) {
|
||||||
try {
|
try {
|
||||||
|
log.info("开始关闭Playwright实例");
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
playwright.close();
|
playwright.close();
|
||||||
|
long endTime = System.currentTimeMillis();
|
||||||
|
log.info("Playwright实例关闭完成,耗时: {}ms", (endTime - startTime));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("关闭Playwright实例时出错", e);
|
log.error("关闭Playwright实例时出错", e);
|
||||||
} finally {
|
} finally {
|
||||||
@@ -66,5 +70,31 @@ public class PlaywrightManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 重新创建Playwright实例
|
||||||
|
*/
|
||||||
|
public void recreate() {
|
||||||
|
close();
|
||||||
|
try {
|
||||||
|
this.playwright = Playwright.create();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("重新创建Playwright实例失败", e);
|
||||||
|
throw new RuntimeException("重新创建Playwright实例失败", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 创建带反检测配置的浏览器上下文选项
|
||||||
|
* @return 浏览器上下文选项
|
||||||
|
*/
|
||||||
|
public com.microsoft.playwright.Browser.NewContextOptions createStealthContextOptions() {
|
||||||
|
com.microsoft.playwright.Browser.NewContextOptions options = new com.microsoft.playwright.Browser.NewContextOptions()
|
||||||
|
.setViewportSize(1680, 900);
|
||||||
|
|
||||||
|
// 设置用户代理(可以设置为常用浏览器的用户代理)
|
||||||
|
// options.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -8,6 +8,9 @@ import jakarta.annotation.Resource;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
|
||||||
@@ -19,11 +22,34 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
public void executeCookieIntercept() {
|
public void executeCookieIntercept() {
|
||||||
// 启动可见浏览器
|
// 启动可见浏览器
|
||||||
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
||||||
.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
|
.chromium().launch(getLaunchOptions())) {
|
||||||
// 设置浏览器窗口大小为1920x1080
|
// 设置浏览器上下文选项以减少自动化检测
|
||||||
Browser.NewContextOptions contextOptions = new Browser.NewContextOptions()
|
Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions();
|
||||||
.setViewportSize(1680, 900);
|
// 添加额外的反检测配置
|
||||||
|
Map<String, String> extraHTTPHeaders = new HashMap<>();
|
||||||
|
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
|
||||||
|
contextOptions.setExtraHTTPHeaders(extraHTTPHeaders);
|
||||||
|
|
||||||
BrowserContext context = browser.newContext(contextOptions);
|
BrowserContext context = browser.newContext(contextOptions);
|
||||||
|
|
||||||
|
// 添加反自动化检测脚本
|
||||||
|
context.addInitScript("""
|
||||||
|
() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', {
|
||||||
|
get: () => undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 添加一些常见的浏览器属性
|
||||||
|
Object.defineProperty(navigator, 'languages', {
|
||||||
|
get: () => ['zh-CN', 'zh', 'en'],
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, 'plugins', {
|
||||||
|
get: () => [1, 2, 3, 4, 5],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
""");
|
||||||
|
|
||||||
Page page = context.newPage();
|
Page page = context.newPage();
|
||||||
AtomicBoolean dyCookie = new AtomicBoolean(false);
|
AtomicBoolean dyCookie = new AtomicBoolean(false);
|
||||||
// 监听网络请求
|
// 监听网络请求
|
||||||
@@ -53,7 +79,35 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取浏览器启动选项,减少自动化检测
|
||||||
|
* @return 浏览器启动选项
|
||||||
|
*/
|
||||||
|
private BrowserType.LaunchOptions getLaunchOptions() {
|
||||||
|
return new BrowserType.LaunchOptions()
|
||||||
|
.setHeadless(false)
|
||||||
|
// 添加这些参数减少自动化检测
|
||||||
|
.setArgs(Arrays.asList(
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
"--disable-extensions",
|
||||||
|
"--disable-infobars",
|
||||||
|
"--disable-background-timer-throttling",
|
||||||
|
"--disable-backgrounding-occluded-windows",
|
||||||
|
"--disable-renderer-backgrounding",
|
||||||
|
"--disable-ipc-flooding-protection",
|
||||||
|
"--disable-background-networking",
|
||||||
|
"--disable-default-apps",
|
||||||
|
"--disable-features=TranslateUI",
|
||||||
|
"--disable-hang-monitor",
|
||||||
|
"--disable-prompt-on-repost",
|
||||||
|
"--force-color-profile=srgb",
|
||||||
|
"--metrics-recording-only",
|
||||||
|
"--no-first-run",
|
||||||
|
"--password-store=basic",
|
||||||
|
"--use-mock-keychain"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -8,6 +8,9 @@ import jakarta.annotation.Resource;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
@Component(value = "ftbCrawlNetMt")
|
@Component(value = "ftbCrawlNetMt")
|
||||||
@@ -18,11 +21,34 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
public void executeCookieIntercept() {
|
public void executeCookieIntercept() {
|
||||||
// 启动可见浏览器
|
// 启动可见浏览器
|
||||||
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
||||||
.chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) {
|
.chromium().launch(getLaunchOptions())) {
|
||||||
// 设置浏览器窗口大小为1920x1080
|
// 设置浏览器窗口大小为1920x1080
|
||||||
Browser.NewContextOptions contextOptions = new Browser.NewContextOptions()
|
Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions();
|
||||||
.setViewportSize(1680, 900);
|
// 添加额外的反检测配置
|
||||||
|
Map<String, String> extraHTTPHeaders = new HashMap<>();
|
||||||
|
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
|
||||||
|
contextOptions.setExtraHTTPHeaders(extraHTTPHeaders);
|
||||||
|
|
||||||
BrowserContext context = browser.newContext(contextOptions);
|
BrowserContext context = browser.newContext(contextOptions);
|
||||||
|
|
||||||
|
// 添加反自动化检测脚本
|
||||||
|
context.addInitScript("""
|
||||||
|
() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', {
|
||||||
|
get: () => undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 添加一些常见的浏览器属性
|
||||||
|
Object.defineProperty(navigator, 'languages', {
|
||||||
|
get: () => ['zh-CN', 'zh', 'en'],
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, 'plugins', {
|
||||||
|
get: () => [1, 2, 3, 4, 5],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
""");
|
||||||
|
|
||||||
// 美团cookie
|
// 美团cookie
|
||||||
AtomicBoolean mtCookie = new AtomicBoolean(false);
|
AtomicBoolean mtCookie = new AtomicBoolean(false);
|
||||||
// 大众点评cookie
|
// 大众点评cookie
|
||||||
@@ -54,6 +80,36 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取浏览器启动选项,减少自动化检测
|
||||||
|
* @return 浏览器启动选项
|
||||||
|
*/
|
||||||
|
private BrowserType.LaunchOptions getLaunchOptions() {
|
||||||
|
return new BrowserType.LaunchOptions()
|
||||||
|
.setHeadless(false)
|
||||||
|
// 添加这些参数减少自动化检测
|
||||||
|
.setArgs(Arrays.asList(
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
"--disable-extensions",
|
||||||
|
"--disable-infobars",
|
||||||
|
"--disable-background-timer-throttling",
|
||||||
|
"--disable-backgrounding-occluded-windows",
|
||||||
|
"--disable-renderer-backgrounding",
|
||||||
|
"--disable-ipc-flooding-protection",
|
||||||
|
"--disable-background-networking",
|
||||||
|
"--disable-default-apps",
|
||||||
|
"--disable-features=TranslateUI",
|
||||||
|
"--disable-hang-monitor",
|
||||||
|
"--disable-prompt-on-repost",
|
||||||
|
"--force-color-profile=srgb",
|
||||||
|
"--metrics-recording-only",
|
||||||
|
"--no-first-run",
|
||||||
|
"--password-store=basic",
|
||||||
|
"--use-mock-keychain"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 处理网络请求,提取需要的cookie信息
|
* 处理网络请求,提取需要的cookie信息
|
||||||
* @param request 网络请求
|
* @param request 网络请求
|
||||||
|
|||||||
Reference in New Issue
Block a user