How to Scrape Google Search Results with Proxies

Google maintains the most sophisticated anti-bot infrastructure on the internet. Scraping Google SERPs (Search Engine Results Pages) at scale requires residential proxies, intelligent rotation, and careful request pacing. This guide covers production-grade SERP scraping patterns.

Disclaimer: Google's Terms of Service restrict automated access. Consider using the official Google Custom Search API for legitimate use cases. This guide covers technical implementation for educational purposes. Ensure your practices comply with applicable terms and laws.

Why Google Scraping Demands Proxies

Google blocks scrapers through: - IP reputation scoring with instant datacenter IP detection - CAPTCHA challenges (reCAPTCHA v3) on suspicious traffic - Behavioral analysis of request timing and patterns - Geographic inconsistency detection

SERP Scraper Architecture

import httpx
import random
import time
from dataclasses import dataclass
from urllib.parse import urlencode
from bs4 import BeautifulSoup

@dataclass(frozen=True)
class SearchResult:
    position: int
    title: str
    url: str
    snippet: str
    query: str
    country: str

def build_google_url(query: str, country: str = "us", num: int = 10) -> str:
    """Build a Google search URL with localization parameters."""
    params = {
        "q": query,
        "num": num,
        "hl": "en",
        "gl": country,
    }
    tld = {"us": "com", "gb": "co.uk", "de": "de", "fr": "fr", "jp": "co.jp"}.get(country, "com")
    return f"https://www.google.{tld}/search?{urlencode(params)}"

def scrape_serp(
    query: str,
    country: str,
    username: str,
    password: str,
) -> list[SearchResult]:
    """Scrape Google SERP for a query from a specific country."""
    proxy = f"http://{username}-country-{country}:{password}@gate.hexproxies.com:8080"
    url = build_google_url(query, country)

    time.sleep(random.uniform(5.0, 10.0))

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": f"en-{'US' if country == 'us' else country.upper()},en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
    }

    with httpx.Client(proxy=proxy, timeout=30, follow_redirects=True) as client:
        resp = client.get(url, headers=headers)
        if resp.status_code != 200:
            return []

        soup = BeautifulSoup(resp.text, "html.parser")
        results: list[SearchResult] = []

        for i, div in enumerate(soup.select("div.g"), start=1):
            title_el = div.select_one("h3")
            link_el = div.select_one("a")
            snippet_el = div.select_one("div.VwiC3b")

            if title_el and link_el:
                results = [*results, SearchResult(
                    position=i,
                    title=title_el.text.strip(),
                    url=link_el.get("href", ""),
                    snippet=snippet_el.text.strip() if snippet_el else "",
                    query=query,
                    country=country,
                )]
        return results

Keyword Rank Tracking

from dataclasses import dataclass

@dataclass(frozen=True)
class RankResult:
    keyword: str
    domain: str
    position: int  # 0 = not found
    country: str
    timestamp: str

def track_ranking(
    keyword: str,
    target_domain: str,
    country: str,
    username: str,
    password: str,
) -> RankResult:
    """Find where a domain ranks for a keyword."""
    from datetime import datetime
    results = scrape_serp(keyword, country, username, password)
    for result in results:
        if target_domain in result.url:
            return RankResult(
                keyword=keyword,
                domain=target_domain,
                position=result.position,
                country=country,
                timestamp=datetime.utcnow().isoformat(),
            )
    return RankResult(
        keyword=keyword,
        domain=target_domain,
        position=0,
        country=country,
        timestamp=datetime.utcnow().isoformat(),
    )

Multi-Country SERP Comparison

Track how search results vary by country — critical for international SEO:

def compare_serps(
    keyword: str,
    countries: list[str],
    username: str,
    password: str,
) -> dict[str, list[SearchResult]]:
    """Compare search results across multiple countries."""
    results: dict[str, list[SearchResult]] = {}
    for country in countries:
        country_results = scrape_serp(keyword, country, username, password)
        results = {**results, country: country_results}
        time.sleep(random.uniform(8.0, 15.0))
    return results

Anti-Detection Patterns

Use 5-15 second delays between searches — Google monitors timing patterns
Rotate residential IPs per search — never reuse an IP for consecutive queries
Vary search parameters — change num, hl, gl to mimic organic behavior
Match proxy country to search locale — US proxy for google.com, UK proxy for google.co.uk
Implement session breaks — pause for 2-5 minutes after every 20-30 searches

CAPTCHA Detection

def is_google_captcha(html: str) -> bool:
    signals = ["unusual traffic", "captcha", "sorry/index", "recaptcha"]
    html_lower = html.lower()
    return any(s in html_lower for s in signals)

Hex Proxies residential network provides the IP diversity essential for Google SERP scraping. With geo-targeting across 195+ countries, you can track rankings from any search locale.

Proxies for Google Search Scraping

Prerequisites

Steps

Configure geo-targeted proxies

Build the SERP scraper

Add rank tracking

Implement multi-country comparison

Set up monitoring pipeline