diff --git a/src/main/java/com/fantaibao/config/PlaywrightManager.java b/src/main/java/com/fantaibao/config/PlaywrightManager.java index 63830ff..bd62c69 100644 --- a/src/main/java/com/fantaibao/config/PlaywrightManager.java +++ b/src/main/java/com/fantaibao/config/PlaywrightManager.java @@ -57,7 +57,11 @@ public class PlaywrightManager { public void close() { if (playwright != null) { try { + log.info("开始关闭Playwright实例"); + long startTime = System.currentTimeMillis(); playwright.close(); + long endTime = System.currentTimeMillis(); + log.info("Playwright实例关闭完成,耗时: {}ms", (endTime - startTime)); } catch (Exception e) { log.error("关闭Playwright实例时出错", e); } finally { @@ -66,5 +70,31 @@ public class PlaywrightManager { } } } - + + /** + * 重新创建Playwright实例 + */ + public void recreate() { + close(); + try { + this.playwright = Playwright.create(); + } catch (Exception e) { + log.error("重新创建Playwright实例失败", e); + throw new RuntimeException("重新创建Playwright实例失败", e); + } + } + + /** + * 创建带反检测配置的浏览器上下文选项 + * @return 浏览器上下文选项 + */ + public com.microsoft.playwright.Browser.NewContextOptions createStealthContextOptions() { + com.microsoft.playwright.Browser.NewContextOptions options = new com.microsoft.playwright.Browser.NewContextOptions() + .setViewportSize(1680, 900); + + // 设置用户代理(可以设置为常用浏览器的用户代理) + // options.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + + return options; + } } \ No newline at end of file diff --git a/src/main/java/com/fantaibao/service/FtbCrawlNetDy.java b/src/main/java/com/fantaibao/service/FtbCrawlNetDy.java index 6ecca94..8d12cf3 100644 --- a/src/main/java/com/fantaibao/service/FtbCrawlNetDy.java +++ b/src/main/java/com/fantaibao/service/FtbCrawlNetDy.java @@ -8,6 +8,9 @@ import jakarta.annotation.Resource; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; @@ -19,11 +22,34 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe public void executeCookieIntercept() { // 启动可见浏览器 try (Browser browser = PlaywrightManager.getInstance().getPlaywright() - .chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) { - // 设置浏览器窗口大小为1920x1080 - Browser.NewContextOptions contextOptions = new Browser.NewContextOptions() - .setViewportSize(1680, 900); + .chromium().launch(getLaunchOptions())) { + // 设置浏览器上下文选项以减少自动化检测 + Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions(); + // 添加额外的反检测配置 + Map extraHTTPHeaders = new HashMap<>(); + extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + contextOptions.setExtraHTTPHeaders(extraHTTPHeaders); + BrowserContext context = browser.newContext(contextOptions); + + // 添加反自动化检测脚本 + context.addInitScript(""" + () => { + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined, + }); + + // 添加一些常见的浏览器属性 + Object.defineProperty(navigator, 'languages', { + get: () => ['zh-CN', 'zh', 'en'], + }); + + Object.defineProperty(navigator, 'plugins', { + get: () => [1, 2, 3, 4, 5], + }); + } + """); + Page page = context.newPage(); AtomicBoolean dyCookie = new AtomicBoolean(false); // 监听网络请求 @@ -53,7 +79,35 @@ public class FtbCrawlNetDy extends AbstractFtbCrawlNetBase implements FtbCrawlNe } } - - + + /** + * 获取浏览器启动选项,减少自动化检测 + * @return 浏览器启动选项 + */ + private BrowserType.LaunchOptions getLaunchOptions() { + return new BrowserType.LaunchOptions() + .setHeadless(false) + // 添加这些参数减少自动化检测 + .setArgs(Arrays.asList( + "--no-sandbox", + "--disable-blink-features=AutomationControlled", + "--disable-extensions", + "--disable-infobars", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-renderer-backgrounding", + "--disable-ipc-flooding-protection", + "--disable-background-networking", + "--disable-default-apps", + "--disable-features=TranslateUI", + "--disable-hang-monitor", + "--disable-prompt-on-repost", + "--force-color-profile=srgb", + "--metrics-recording-only", + "--no-first-run", + "--password-store=basic", + "--use-mock-keychain" + )); + } } \ No newline at end of file diff --git a/src/main/java/com/fantaibao/service/FtbCrawlNetMt.java b/src/main/java/com/fantaibao/service/FtbCrawlNetMt.java index 536a866..3b44895 100644 --- a/src/main/java/com/fantaibao/service/FtbCrawlNetMt.java +++ b/src/main/java/com/fantaibao/service/FtbCrawlNetMt.java @@ -8,6 +8,9 @@ import jakarta.annotation.Resource; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; @Component(value = "ftbCrawlNetMt") @@ -18,11 +21,34 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe public void executeCookieIntercept() { // 启动可见浏览器 try (Browser browser = PlaywrightManager.getInstance().getPlaywright() - .chromium().launch(new BrowserType.LaunchOptions().setHeadless(false))) { + .chromium().launch(getLaunchOptions())) { // 设置浏览器窗口大小为1920x1080 - Browser.NewContextOptions contextOptions = new Browser.NewContextOptions() - .setViewportSize(1680, 900); + Browser.NewContextOptions contextOptions = PlaywrightManager.getInstance().createStealthContextOptions(); + // 添加额外的反检测配置 + Map extraHTTPHeaders = new HashMap<>(); + extraHTTPHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + contextOptions.setExtraHTTPHeaders(extraHTTPHeaders); + BrowserContext context = browser.newContext(contextOptions); + + // 添加反自动化检测脚本 + context.addInitScript(""" + () => { + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined, + }); + + // 添加一些常见的浏览器属性 + Object.defineProperty(navigator, 'languages', { + get: () => ['zh-CN', 'zh', 'en'], + }); + + Object.defineProperty(navigator, 'plugins', { + get: () => [1, 2, 3, 4, 5], + }); + } + """); + // 美团cookie AtomicBoolean mtCookie = new AtomicBoolean(false); // 大众点评cookie @@ -54,6 +80,36 @@ public class FtbCrawlNetMt extends AbstractFtbCrawlNetBase implements FtbCrawlNe } } + /** + * 获取浏览器启动选项,减少自动化检测 + * @return 浏览器启动选项 + */ + private BrowserType.LaunchOptions getLaunchOptions() { + return new BrowserType.LaunchOptions() + .setHeadless(false) + // 添加这些参数减少自动化检测 + .setArgs(Arrays.asList( + "--no-sandbox", + "--disable-blink-features=AutomationControlled", + "--disable-extensions", + "--disable-infobars", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-renderer-backgrounding", + "--disable-ipc-flooding-protection", + "--disable-background-networking", + "--disable-default-apps", + "--disable-features=TranslateUI", + "--disable-hang-monitor", + "--disable-prompt-on-repost", + "--force-color-profile=srgb", + "--metrics-recording-only", + "--no-first-run", + "--password-store=basic", + "--use-mock-keychain" + )); + } + /** * 处理网络请求,提取需要的cookie信息 * @param request 网络请求