From f3ea90d35263fb3fe85a78b412bcd95d1691d05f Mon Sep 17 00:00:00 2001 From: Bruno Mendes Date: Tue, 4 Nov 2025 09:35:44 +0000 Subject: [PATCH] Add Datadog Synthetics UA to spiders --- regexes.yaml | 8 ++++++-- tests/test_device.yaml | 5 +++++ tests/test_ua.yaml | 16 +++++++++++----- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/regexes.yaml b/regexes.yaml index 7b12c52b..46347dbd 100644 --- a/regexes.yaml +++ b/regexes.yaml @@ -104,10 +104,14 @@ user_agent_parsers: # Salesforce - regex: '(Salesforce)(?:.)\/(\d+)\.(\d?)' - #StatusCake + # StatusCake - regex: '(\(StatusCake\))' family_replacement: 'StatusCakeBot' + # Datadog + - regex: '(Datadog|DataDog)\/Synthetics' + family_replacement: 'DatadogSynthetics' + # Facebook - regex: '(facebookexternalhit)/(\d+)\.(\d+)' family_replacement: 'FacebookBot' @@ -6032,7 +6036,7 @@ device_parsers: ########## # Spiders (this is a hack...) ########## - - regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar|GPTBot|Google-InspectionTool)' + - regex: '^.{0,100}(bot|BUbiNG|zao|borg|DBot|oegp|silk|Xenu|zeal|^NING|CCBot|crawl|htdig|lycos|slurp|teoma|voila|yahoo|Sogou|CiBra|Nutch|^Java/|^JNLP/|Daumoa|Daum|Genieo|ichiro|larbin|pompos|Scrapy|snappy|speedy|spider|msnbot|msrbot|vortex|^vortex|crawler|favicon|indexer|Riddler|scooter|scraper|scrubby|WhatWeb|WinHTTP|bingbot|BingPreview|openbot|gigabot|furlbot|polybot|seekbot|^voyager|archiver|Icarus6j|mogimogi|Netvibes|blitzbot|altavista|charlotte|findlinks|Retreiver|TLSProber|WordPress|SeznamBot|ProoXiBot|wsr\-agent|Squrl Java|EtaoSpider|PaperLiBot|SputnikBot|A6\-Indexer|netresearch|searchsight|baiduspider|YisouSpider|ICC\-Crawler|http%20client|Python-urllib|dataparksearch|converacrawler|Screaming Frog|AppEngine-Google|YahooCacheSystem|fast\-webcrawler|Sogou Pic Spider|semanticdiscovery|Innovazion Crawler|facebookexternalhit|Google.{0,200}/\+/web/snippet|Google-HTTP-Java-Client|BlogBridge|IlTrovatore-Setaccio|InternetArchive|GomezAgent|WebThumbnail|heritrix|NewsGator|PagePeeker|Reaper|ZooShot|holmes|NL-Crawler|Pingdom|StatusCake|Datadog|WhatsApp|masscan|Google Web Preview|Qwantify|Yeti|OgScrper|RecipeRadar|GPTBot|Google-InspectionTool)' regex_flag: 'i' device_replacement: 'Spider' brand_replacement: 'Spider' diff --git a/tests/test_device.yaml b/tests/test_device.yaml index f1e41204..8fa97ae2 100644 --- a/tests/test_device.yaml +++ b/tests/test_device.yaml @@ -740,6 +740,11 @@ test_cases: brand: 'Spider' model: 'Desktop' + - user_agent_string: 'Datadog/Synthetics' + family: 'Spider' + brand: 'Spider' + model: 'Desktop' + - user_agent_string: 'Mozilla/5.0 (Linux; Android 4.0.4; SAMSUNG-SGH-I717 Build/IMM76D) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.131 Mobile Safari/537.36' family: 'Samsung SGH-I717' brand: 'Samsung' diff --git a/tests/test_ua.yaml b/tests/test_ua.yaml index 07dae4a8..a5e0913b 100644 --- a/tests/test_ua.yaml +++ b/tests/test_ua.yaml @@ -1061,6 +1061,12 @@ test_cases: minor: patch: + - user_agent_string: 'Datadog/Synthetics' + family: 'DatadogSynthetics' + major: + minor: + patch: + - user_agent_string: 'Mozilla/3.0 (Planetweb/2.100 JS SSL US; Dreamcast US)' family: 'Planetweb' major: '2' @@ -7913,9 +7919,9 @@ test_cases: - user_agent_string: 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 BytedanceWebview/d8a21c6 trill_34.9.0 JsSdk/2.0 NetType/4G Channel/App Store ByteLocale/en Region/MY FalconTag/31CFA6E0-CAE5-4ECF-A13A-ADFCAFC2428F' family: 'TikTok' - major: - minor: - patch: + major: + minor: + patch: - user_agent_string: 'Mozilla/5.0 (iPhone; CPU iPhone OS 18_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Safari/604.1 KAKAOTALK/11.3.1 (INAPP)' family: 'KakaoTalk' @@ -8145,7 +8151,7 @@ test_cases: major: '1' minor: '4' patch: '12' - + - user_agent_string: 'aws-sdk-go-v2/1.24.1 os/linux lang/go#1.20.4 md/GOOS#linux md/GOARCH#arm64 api/sts#1.26.7' family: 'aws-sdk-go-v2' major: '1' @@ -9129,7 +9135,7 @@ test_cases: major: minor: patch: - + - user_agent_string: 'Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.200 Safari/537.36 Qaxbrowser' family: 'QAX Browser' major: