Python Proxy Configuration Overview
Python is the most popular language for web scraping, data collection, and API automation. Every major Python HTTP library supports proxy configuration, and Hex Proxies integrates seamlessly with all of them. This guide covers the four most-used libraries: requests, Scrapy, Selenium, and aiohttp.
Requests Library
The `requests` library is the most straightforward way to use proxies in Python.
Basic Proxy Setup
proxies = { "http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", }
response = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=30) print(response.json()) ```
SOCKS5 Proxy with Requests
Install the SOCKS support package first:
pip install requests[socks]Then configure:
proxies = {
"http": "socks5h://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:1080",
"https": "socks5h://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:1080",
}Use `socks5h://` (with the `h`) to route DNS through the proxy. Plain `socks5://` resolves DNS locally, which can leak your real location.
Session-Based Proxy
For multiple requests with the same proxy configuration:
session = requests.Session()
session.proxies = {
"http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080",
"https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080",
}response = session.get("https://httpbin.org/ip", timeout=30) print(response.json()) ```
Rotating Proxies with Hex Proxies
Generate unique session identifiers to rotate IPs:
import uuiddef make_request(url, country=None): session_id = uuid.uuid4().hex[:8] username = f"YOUR_USERNAME-session-{session_id}" if country: username = f"YOUR_USERNAME-country-{country}-session-{session_id}"
proxies = { "http": f"http://{username}:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": f"http://{username}:YOUR_PASSWORD@gate.hexproxies.com:8080", }
response = requests.get(url, proxies=proxies, timeout=30) return response
# Each call gets a different IP for i in range(5): resp = make_request("https://httpbin.org/ip") print(f"Request {i+1}: {resp.json()['origin']}") ```
Sticky Sessions
To maintain the same IP across multiple requests, reuse the same session identifier:
proxies = { "http": f"http://YOUR_USERNAME-session-{STICKY_SESSION}:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": f"http://YOUR_USERNAME-session-{STICKY_SESSION}:YOUR_PASSWORD@gate.hexproxies.com:8080", }
# All requests use the same IP for page in range(1, 11): response = requests.get(f"https://example.com/page/{page}", proxies=proxies, timeout=30) print(f"Page {page}: {response.status_code}") ```
Scrapy Middleware
Scrapy is the industry-standard web scraping framework for Python. Integrate Hex Proxies through a custom downloader middleware.
Creating the Proxy Middleware
Create `middlewares/proxy_middleware.py`:
class HexProxiesMiddleware: def __init__(self, proxy_host, proxy_port, username, password, rotate=True): self.proxy_host = proxy_host self.proxy_port = proxy_port self.username = username self.password = password self.rotate = rotate
@classmethod def from_crawler(cls, crawler): return cls( proxy_host=crawler.settings.get("HEX_PROXY_HOST", "gate.hexproxies.com"), proxy_port=crawler.settings.get("HEX_PROXY_PORT", "8080"), username=crawler.settings.get("HEX_PROXY_USER"), password=crawler.settings.get("HEX_PROXY_PASS"), rotate=crawler.settings.getbool("HEX_PROXY_ROTATE", True), )
def process_request(self, request, spider): username = self.username if self.rotate: session_id = uuid.uuid4().hex[:8] username = f"{self.username}-session-{session_id}"
proxy_url = f"http://{username}:{self.password}@{self.proxy_host}:{self.proxy_port}" request.meta["proxy"] = proxy_url ```
Scrapy Settings
Add to `settings.py`:
DOWNLOADER_MIDDLEWARES = {
"myproject.middlewares.proxy_middleware.HexProxiesMiddleware": 350,HEX_PROXY_HOST = "gate.hexproxies.com" HEX_PROXY_PORT = "8080" HEX_PROXY_USER = "YOUR_USERNAME" HEX_PROXY_PASS = "YOUR_PASSWORD" HEX_PROXY_ROTATE = True
# Recommended settings for proxy scraping CONCURRENT_REQUESTS = 16 DOWNLOAD_DELAY = 0.5 RETRY_TIMES = 3 RETRY_HTTP_CODES = [403, 429, 500, 502, 503] ```
Country-Targeted Scraping
Modify the middleware to support per-request country targeting:
def process_request(self, request, spider):
username = self.username
country = request.meta.get("proxy_country")if country: username = f"{self.username}-country-{country}-session-{session_id}" elif self.rotate: username = f"{self.username}-session-{session_id}"
proxy_url = f"http://{username}:{self.password}@{self.proxy_host}:{self.proxy_port}" request.meta["proxy"] = proxy_url ```
Use in your spider:
yield scrapy.Request(
url="https://example.com",
meta={"proxy_country": "us"},
)Selenium WebDriver
Selenium automates real browsers, making proxy configuration different from pure HTTP libraries.
Chrome with Proxy
from selenium import webdriverchrome_options = Options() chrome_options.add_argument("--proxy-server=http://gate.hexproxies.com:8080")
driver = webdriver.Chrome(options=chrome_options) driver.get("https://httpbin.org/ip") print(driver.page_source) driver.quit() ```
Authenticated Proxy with Selenium Wire
The standard Chrome `--proxy-server` flag does not support authentication. Use `selenium-wire` for authenticated proxies:
pip install selenium-wiresw_options = { "proxy": { "http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "no_proxy": "localhost,127.0.0.1", } }
driver = webdriver.Chrome(seleniumwire_options=sw_options) driver.get("https://httpbin.org/ip") print(driver.page_source) driver.quit() ```
Firefox with Proxy
from selenium import webdriverprofile = webdriver.FirefoxProfile() profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.http", "gate.hexproxies.com") profile.set_preference("network.proxy.http_port", 8080) profile.set_preference("network.proxy.ssl", "gate.hexproxies.com") profile.set_preference("network.proxy.ssl_port", 8080) profile.set_preference("network.proxy.socks_remote_dns", True)
options = Options() options.profile = profile
driver = webdriver.Firefox(options=options) driver.get("https://httpbin.org/ip") print(driver.page_source) driver.quit() ```
Headless Browser with Proxy
from seleniumwire import webdriverchrome_options = Options() chrome_options.add_argument("--headless=new") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage")
sw_options = { "proxy": { "http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", } }
driver = webdriver.Chrome(options=chrome_options, seleniumwire_options=sw_options) driver.get("https://httpbin.org/ip") ip_address = driver.find_element("tag name", "body").text print(f"Proxy IP: {ip_address}") driver.quit() ```
Aiohttp (Async Proxy)
For high-concurrency scraping, `aiohttp` with async/await provides superior throughput.
Basic Async Proxy
import aiohttpasync def fetch(url): proxy = "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080"
async with aiohttp.ClientSession() as session: async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=30)) as response: data = await response.json() return data
result = asyncio.run(fetch("https://httpbin.org/ip")) print(result) ```
Concurrent Rotating Proxy Requests
import aiohttp
import asyncioasync def fetch_with_rotation(session, url, username, password): session_id = uuid.uuid4().hex[:8] proxy_user = f"{username}-session-{session_id}" proxy = f"http://{proxy_user}:{password}@gate.hexproxies.com:8080"
try: async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=30)) as response: data = await response.json() return {"ip": data.get("origin"), "status": response.status} except (aiohttp.ClientError, asyncio.TimeoutError) as e: return {"error": str(e)}
async def main(): urls = ["https://httpbin.org/ip"] * 20 username = "YOUR_USERNAME" password = "YOUR_PASSWORD"
connector = aiohttp.TCPConnector(limit=10) async with aiohttp.ClientSession(connector=connector) as session: tasks = [fetch_with_rotation(session, url, username, password) for url in urls] results = await asyncio.gather(*tasks)
for i, result in enumerate(results): print(f"Request {i+1}: {result}")
asyncio.run(main()) ```
Rate-Limited Async Requests
import aiohttp
import asyncioclass RateLimiter: def __init__(self, rate): self.rate = rate self.semaphore = asyncio.Semaphore(rate)
async def acquire(self): await self.semaphore.acquire() asyncio.get_event_loop().call_later(1.0, self.semaphore.release)
async def scrape_with_limits(urls, username, password, concurrency=10): limiter = RateLimiter(concurrency) connector = aiohttp.TCPConnector(limit=concurrency) results = []
async with aiohttp.ClientSession(connector=connector) as session: async def fetch(url): await limiter.acquire() session_id = uuid.uuid4().hex[:8] proxy = f"http://{username}-session-{session_id}:{password}@gate.hexproxies.com:8080" try: async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=30)) as resp: return await resp.text() except Exception as e: return f"Error: {e}"
tasks = [fetch(url) for url in urls] results = await asyncio.gather(*tasks)
return results ```
Error Handling and Retries
Retry Decorator for Requests
import time
import requests
import uuiddef retry_with_proxy(max_retries=3, backoff_factor=1.0): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): last_exception = None for attempt in range(max_retries): try: return func(*args, **kwargs) except (requests.exceptions.ProxyError, requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: last_exception = e wait_time = backoff_factor * (2 ** attempt) print(f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s...") time.sleep(wait_time) except requests.exceptions.HTTPError as e: if e.response.status_code in (429, 503): last_exception = e wait_time = backoff_factor * (2 ** attempt) time.sleep(wait_time) else: raise raise last_exception return wrapper return decorator
@retry_with_proxy(max_retries=3) def fetch_page(url, username, password): session_id = uuid.uuid4().hex[:8] proxies = { "http": f"http://{username}-session-{session_id}:{password}@gate.hexproxies.com:8080", "https": f"http://{username}-session-{session_id}:{password}@gate.hexproxies.com:8080", } response = requests.get(url, proxies=proxies, timeout=30) response.raise_for_status() return response.text ```
Environment Variable Configuration
For production deployments, avoid hardcoding credentials:
PROXY_USER = os.environ["HEX_PROXY_USER"] PROXY_PASS = os.environ["HEX_PROXY_PASS"] PROXY_HOST = os.environ.get("HEX_PROXY_HOST", "gate.hexproxies.com") PROXY_PORT = os.environ.get("HEX_PROXY_PORT", "8080")
proxies = { "http": f"http://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}", "https": f"http://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}", } ```
Set environment variables before running your script:
export HEX_PROXY_USER="your_username"
export HEX_PROXY_PASS="your_password"
python your_script.py