feat(playwright): 引入浏览器反检测配置工具类
新增 `BrowserStealthConfig` 工具类,统一管理 Playwright 的反自动化检测配置。 该变更将原本分散在 Dy 和 Mt 爬虫服务中的浏览器启动选项、上下文配置及初始化脚本进行抽取, 集中到新的配置类中,提升代码复用性和可维护性。- 添加 `getStealthLaunchOptions()` 方法用于获取标准的反检测浏览器启动参数- 添加 `configureStealthContext()` 方法封装 navigator 属性伪装逻辑 - 添加 `getStealthContextOptions()` 方法提供带语言头的上下文配置- 在 Dy 和 Mt 爬虫服务中移除重复配置代码,改为调用新工具类方法- 移除了不再使用的 import语句及相关冗余变量声明
This commit is contained in:
86
src/main/java/com/fantaibao/config/BrowserStealthConfig.java
Normal file
86
src/main/java/com/fantaibao/config/BrowserStealthConfig.java
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
package com.fantaibao.config;
|
||||||
|
|
||||||
|
import com.microsoft.playwright.Browser;
|
||||||
|
import com.microsoft.playwright.BrowserContext;
|
||||||
|
import com.microsoft.playwright.BrowserType;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 浏览器反检测配置工具类
|
||||||
|
* 用于提供统一的浏览器反自动化检测配置
|
||||||
|
*/
|
||||||
|
public class BrowserStealthConfig {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取浏览器启动选项,减少自动化检测
|
||||||
|
*
|
||||||
|
* @return 浏览器启动选项
|
||||||
|
*/
|
||||||
|
public static BrowserType.LaunchOptions getStealthLaunchOptions() {
|
||||||
|
return new BrowserType.LaunchOptions()
|
||||||
|
.setHeadless(false)
|
||||||
|
// 添加这些参数减少自动化检测
|
||||||
|
.setArgs(Arrays.asList(
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
"--disable-extensions",
|
||||||
|
"--disable-infobars",
|
||||||
|
"--disable-background-timer-throttling",
|
||||||
|
"--disable-backgrounding-occluded-windows",
|
||||||
|
"--disable-renderer-backgrounding",
|
||||||
|
"--disable-ipc-flooding-protection",
|
||||||
|
"--disable-background-networking",
|
||||||
|
"--disable-default-apps",
|
||||||
|
"--disable-features=TranslateUI",
|
||||||
|
"--disable-hang-monitor",
|
||||||
|
"--disable-prompt-on-repost",
|
||||||
|
"--force-color-profile=srgb",
|
||||||
|
"--metrics-recording-only",
|
||||||
|
"--no-first-run",
|
||||||
|
"--password-store=basic",
|
||||||
|
"--use-mock-keychain"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 配置浏览器上下文,添加反检测设置
|
||||||
|
*
|
||||||
|
* @param context 浏览器上下文
|
||||||
|
*/
|
||||||
|
public static void configureStealthContext(BrowserContext context) {
|
||||||
|
// 添加反自动化检测脚本
|
||||||
|
context.addInitScript("""
|
||||||
|
() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', {
|
||||||
|
get: () => undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
// 添加一些常见的浏览器属性
|
||||||
|
Object.defineProperty(navigator, 'languages', {
|
||||||
|
get: () => ['zh-CN', 'zh', 'en'],
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.defineProperty(navigator, 'plugins', {
|
||||||
|
get: () => [1, 2, 3, 4, 5],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
""");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取带反检测配置的浏览器上下文选项
|
||||||
|
*
|
||||||
|
* @return 浏览器上下文选项
|
||||||
|
*/
|
||||||
|
public static Browser.NewContextOptions getStealthContextOptions() {
|
||||||
|
Browser.NewContextOptions options = PlaywrightManager.getInstance().createStealthContextOptions();
|
||||||
|
// 添加额外的反检测配置
|
||||||
|
Map<String, String> extraHTTPHeaders = new HashMap<>();
|
||||||
|
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
|
||||||
|
options.setExtraHTTPHeaders(extraHTTPHeaders);
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,16 +1,13 @@
|
|||||||
package com.fantaibao.service;
|
package com.fantaibao.service;
|
||||||
|
|
||||||
import com.fantaibao.base.FtbCrawlNetBase;
|
import com.fantaibao.base.FtbCrawlNetBase;
|
||||||
|
import com.fantaibao.config.BrowserStealthConfig;
|
||||||
import com.fantaibao.config.GlobalConfig;
|
import com.fantaibao.config.GlobalConfig;
|
||||||
import com.fantaibao.config.PlaywrightManager;
|
import com.fantaibao.config.PlaywrightManager;
|
||||||
import com.microsoft.playwright.*;
|
import com.microsoft.playwright.*;
|
||||||
import jakarta.annotation.Resource;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
|
||||||
@@ -22,33 +19,12 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
public void executeCookieIntercept() {
|
public void executeCookieIntercept() {
|
||||||
// 启动可见浏览器
|
// 启动可见浏览器
|
||||||
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
||||||
.chromium().launch(getLaunchOptions())) {
|
.chromium().launch(BrowserStealthConfig.getStealthLaunchOptions())) {
|
||||||
// 设置浏览器上下文选项以减少自动化检测
|
// 设置浏览器上下文选项以减少自动化检测
|
||||||
Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions();
|
BrowserContext context = browser.newContext(BrowserStealthConfig.getStealthContextOptions());
|
||||||
// 添加额外的反检测配置
|
|
||||||
Map<String, String> extraHTTPHeaders = new HashMap<>();
|
|
||||||
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
|
|
||||||
contextOptions.setExtraHTTPHeaders(extraHTTPHeaders);
|
|
||||||
|
|
||||||
BrowserContext context = browser.newContext(contextOptions);
|
|
||||||
|
|
||||||
// 添加反自动化检测脚本
|
// 添加反自动化检测脚本
|
||||||
context.addInitScript("""
|
BrowserStealthConfig.configureStealthContext(context);
|
||||||
() => {
|
|
||||||
Object.defineProperty(navigator, 'webdriver', {
|
|
||||||
get: () => undefined,
|
|
||||||
});
|
|
||||||
|
|
||||||
// 添加一些常见的浏览器属性
|
|
||||||
Object.defineProperty(navigator, 'languages', {
|
|
||||||
get: () => ['zh-CN', 'zh', 'en'],
|
|
||||||
});
|
|
||||||
|
|
||||||
Object.defineProperty(navigator, 'plugins', {
|
|
||||||
get: () => [1, 2, 3, 4, 5],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
""");
|
|
||||||
|
|
||||||
Page page = context.newPage();
|
Page page = context.newPage();
|
||||||
AtomicBoolean dyCookie = new AtomicBoolean(false);
|
AtomicBoolean dyCookie = new AtomicBoolean(false);
|
||||||
@@ -80,34 +56,4 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 获取浏览器启动选项,减少自动化检测
|
|
||||||
* @return 浏览器启动选项
|
|
||||||
*/
|
|
||||||
private BrowserType.LaunchOptions getLaunchOptions() {
|
|
||||||
return new BrowserType.LaunchOptions()
|
|
||||||
.setHeadless(false)
|
|
||||||
// 添加这些参数减少自动化检测
|
|
||||||
.setArgs(Arrays.asList(
|
|
||||||
"--no-sandbox",
|
|
||||||
"--disable-blink-features=AutomationControlled",
|
|
||||||
"--disable-extensions",
|
|
||||||
"--disable-infobars",
|
|
||||||
"--disable-background-timer-throttling",
|
|
||||||
"--disable-backgrounding-occluded-windows",
|
|
||||||
"--disable-renderer-backgrounding",
|
|
||||||
"--disable-ipc-flooding-protection",
|
|
||||||
"--disable-background-networking",
|
|
||||||
"--disable-default-apps",
|
|
||||||
"--disable-features=TranslateUI",
|
|
||||||
"--disable-hang-monitor",
|
|
||||||
"--disable-prompt-on-repost",
|
|
||||||
"--force-color-profile=srgb",
|
|
||||||
"--metrics-recording-only",
|
|
||||||
"--no-first-run",
|
|
||||||
"--password-store=basic",
|
|
||||||
"--use-mock-keychain"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1,16 +1,13 @@
|
|||||||
package com.fantaibao.service;
|
package com.fantaibao.service;
|
||||||
|
|
||||||
import com.fantaibao.base.FtbCrawlNetBase;
|
import com.fantaibao.base.FtbCrawlNetBase;
|
||||||
|
import com.fantaibao.config.BrowserStealthConfig;
|
||||||
import com.fantaibao.config.GlobalConfig;
|
import com.fantaibao.config.GlobalConfig;
|
||||||
import com.fantaibao.config.PlaywrightManager;
|
import com.fantaibao.config.PlaywrightManager;
|
||||||
import com.microsoft.playwright.*;
|
import com.microsoft.playwright.*;
|
||||||
import jakarta.annotation.Resource;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
@Component(value = "ftbCrawlNetMt")
|
@Component(value = "ftbCrawlNetMt")
|
||||||
@@ -21,33 +18,12 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
public void executeCookieIntercept() {
|
public void executeCookieIntercept() {
|
||||||
// 启动可见浏览器
|
// 启动可见浏览器
|
||||||
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
try (Browser browser = PlaywrightManager.getInstance().getPlaywright()
|
||||||
.chromium().launch(getLaunchOptions())) {
|
.chromium().launch(BrowserStealthConfig.getStealthLaunchOptions())) {
|
||||||
// 设置浏览器窗口大小为1920x1080
|
// 设置浏览器上下文选项以减少自动化检测
|
||||||
Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions();
|
BrowserContext context = browser.newContext(BrowserStealthConfig.getStealthContextOptions());
|
||||||
// 添加额外的反检测配置
|
|
||||||
Map<String, String> extraHTTPHeaders = new HashMap<>();
|
|
||||||
extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
|
|
||||||
contextOptions.setExtraHTTPHeaders(extraHTTPHeaders);
|
|
||||||
|
|
||||||
BrowserContext context = browser.newContext(contextOptions);
|
|
||||||
|
|
||||||
// 添加反自动化检测脚本
|
// 添加反自动化检测脚本
|
||||||
context.addInitScript("""
|
BrowserStealthConfig.configureStealthContext(context);
|
||||||
() => {
|
|
||||||
Object.defineProperty(navigator, 'webdriver', {
|
|
||||||
get: () => undefined,
|
|
||||||
});
|
|
||||||
|
|
||||||
// 添加一些常见的浏览器属性
|
|
||||||
Object.defineProperty(navigator, 'languages', {
|
|
||||||
get: () => ['zh-CN', 'zh', 'en'],
|
|
||||||
});
|
|
||||||
|
|
||||||
Object.defineProperty(navigator, 'plugins', {
|
|
||||||
get: () => [1, 2, 3, 4, 5],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
""");
|
|
||||||
|
|
||||||
// 美团cookie
|
// 美团cookie
|
||||||
AtomicBoolean mtCookie = new AtomicBoolean(false);
|
AtomicBoolean mtCookie = new AtomicBoolean(false);
|
||||||
@@ -80,36 +56,6 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 获取浏览器启动选项,减少自动化检测
|
|
||||||
* @return 浏览器启动选项
|
|
||||||
*/
|
|
||||||
private BrowserType.LaunchOptions getLaunchOptions() {
|
|
||||||
return new BrowserType.LaunchOptions()
|
|
||||||
.setHeadless(false)
|
|
||||||
// 添加这些参数减少自动化检测
|
|
||||||
.setArgs(Arrays.asList(
|
|
||||||
"--no-sandbox",
|
|
||||||
"--disable-blink-features=AutomationControlled",
|
|
||||||
"--disable-extensions",
|
|
||||||
"--disable-infobars",
|
|
||||||
"--disable-background-timer-throttling",
|
|
||||||
"--disable-backgrounding-occluded-windows",
|
|
||||||
"--disable-renderer-backgrounding",
|
|
||||||
"--disable-ipc-flooding-protection",
|
|
||||||
"--disable-background-networking",
|
|
||||||
"--disable-default-apps",
|
|
||||||
"--disable-features=TranslateUI",
|
|
||||||
"--disable-hang-monitor",
|
|
||||||
"--disable-prompt-on-repost",
|
|
||||||
"--force-color-profile=srgb",
|
|
||||||
"--metrics-recording-only",
|
|
||||||
"--no-first-run",
|
|
||||||
"--password-store=basic",
|
|
||||||
"--use-mock-keychain"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 处理网络请求,提取需要的cookie信息
|
* 处理网络请求,提取需要的cookie信息
|
||||||
* @param request 网络请求
|
* @param request 网络请求
|
||||||
|
|||||||
Reference in New Issue
Block a user