Python Proxy Configuration Overview
Python is the most popular language for web scraping, data collection, and API automation. Every major Python HTTP library supports proxy configuration, and Hex Proxies integrates seamlessly with all of them. This guide covers the four most-used libraries: requests, Scrapy, Selenium, and aiohttp.
Requests Library
The requests library is the most straightforward way to use proxies in Python.
Basic Proxy Setup
import requestsproxies = { "http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", }
response = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=30) print(response.json()) ```
SOCKS5 Proxy with Requests
Install the SOCKS support package first:
pip install requests[socks]Then configure:
proxies = {
"http": "socks5h://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:1080",
"https": "socks5h://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:1080",
}Use socks5h:// (with the h) to route DNS through the proxy. Plain socks5:// resolves DNS locally, which can leak your real location.
Session-Based Proxy
For multiple requests with the same proxy configuration:
session = requests.Session()
session.proxies = {
"http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080",
"https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080",
}
session.headers.update({"User-Agent": "Mozilla/5.0 (compatible; HexBot/1.0)"})response = session.get("https://httpbin.org/ip", timeout=30) print(response.json()) ```
Rotating Proxies with Hex Proxies
Generate unique session identifiers to rotate IPs:
import uuid
import requestsdef make_request(url, country=None): session_id = uuid.uuid4().hex[:8] username = f"YOUR_USERNAME-session-{session_id}" if country: username = f"YOUR_USERNAME-country-{country}-session-{session_id}"
proxies = { "http": f"http://{username}:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": f"http://{username}:YOUR_PASSWORD@gate.hexproxies.com:8080", }
response = requests.get(url, proxies=proxies, timeout=30) return response
Each call gets a different IP for i in range(5): resp = make_request("https://httpbin.org/ip") print(f"Request {i+1}: {resp.json()['origin']}") ```
Sticky Sessions
To maintain the same IP across multiple requests, reuse the same session identifier:
STICKY_SESSION = "my-scrape-job-001"proxies = { "http": f"http://YOUR_USERNAME-session-{STICKY_SESSION}:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": f"http://YOUR_USERNAME-session-{STICKY_SESSION}:YOUR_PASSWORD@gate.hexproxies.com:8080", }
All requests use the same IP for page in range(1, 11): response = requests.get(f"https://example.com/page/{page}", proxies=proxies, timeout=30) print(f"Page {page}: {response.status_code}") ```
Scrapy Middleware
Scrapy is the industry-standard web scraping framework for Python. Integrate Hex Proxies through a custom downloader middleware.
Creating the Proxy Middleware
Create middlewares/proxy_middleware.py:
import uuidclass HexProxiesMiddleware: def __init__(self, proxy_host, proxy_port, username, password, rotate=True): self.proxy_host = proxy_host self.proxy_port = proxy_port self.username = username self.password = password self.rotate = rotate
@classmethod def from_crawler(cls, crawler): return cls( proxy_host=crawler.settings.get("HEX_PROXY_HOST", "gate.hexproxies.com"), proxy_port=crawler.settings.get("HEX_PROXY_PORT", "8080"), username=crawler.settings.get("HEX_PROXY_USER"), password=crawler.settings.get("HEX_PROXY_PASS"), rotate=crawler.settings.getbool("HEX_PROXY_ROTATE", True), )
def process_request(self, request, spider): username = self.username if self.rotate: session_id = uuid.uuid4().hex[:8] username = f"{self.username}-session-{session_id}"
proxy_url = f"http://{username}:{self.password}@{self.proxy_host}:{self.proxy_port}" request.meta["proxy"] = proxy_url ```
Scrapy Settings
Add to settings.py:
DOWNLOADER_MIDDLEWARES = {
"myproject.middlewares.proxy_middleware.HexProxiesMiddleware": 350,
}HEX_PROXY_HOST = "gate.hexproxies.com" HEX_PROXY_PORT = "8080" HEX_PROXY_USER = "YOUR_USERNAME" HEX_PROXY_PASS = "YOUR_PASSWORD" HEX_PROXY_ROTATE = True
Recommended settings for proxy scraping CONCURRENT_REQUESTS = 16 DOWNLOAD_DELAY = 0.5 RETRY_TIMES = 3 RETRY_HTTP_CODES = [403, 429, 500, 502, 503] ```
Country-Targeted Scraping
Modify the middleware to support per-request country targeting:
def process_request(self, request, spider):
username = self.username
country = request.meta.get("proxy_country")
session_id = uuid.uuid4().hex[:8]if country: username = f"{self.username}-country-{country}-session-{session_id}" elif self.rotate: username = f"{self.username}-session-{session_id}"
proxy_url = f"http://{username}:{self.password}@{self.proxy_host}:{self.proxy_port}" request.meta["proxy"] = proxy_url ```
Use in your spider:
yield scrapy.Request(
url="https://example.com",
meta={"proxy_country": "us"},
)Selenium WebDriver
Selenium automates real browsers, making proxy configuration different from pure HTTP libraries.
Chrome with Proxy
from selenium import webdriver
from selenium.webdriver.chrome.options import Optionschrome_options = Options() chrome_options.add_argument("--proxy-server=http://gate.hexproxies.com:8080")
driver = webdriver.Chrome(options=chrome_options) driver.get("https://httpbin.org/ip") print(driver.page_source) driver.quit() ```
Authenticated Proxy with Selenium Wire
The standard Chrome --proxy-server flag does not support authentication. Use selenium-wire for authenticated proxies:
pip install selenium-wirefrom seleniumwire import webdriversw_options = { "proxy": { "http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "no_proxy": "localhost,127.0.0.1", } }
driver = webdriver.Chrome(seleniumwire_options=sw_options) driver.get("https://httpbin.org/ip") print(driver.page_source) driver.quit() ```
Firefox with Proxy
from selenium import webdriver
from selenium.webdriver.firefox.options import Optionsprofile = webdriver.FirefoxProfile() profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.http", "gate.hexproxies.com") profile.set_preference("network.proxy.http_port", 8080) profile.set_preference("network.proxy.ssl", "gate.hexproxies.com") profile.set_preference("network.proxy.ssl_port", 8080) profile.set_preference("network.proxy.socks_remote_dns", True)
options = Options() options.profile = profile
driver = webdriver.Firefox(options=options) driver.get("https://httpbin.org/ip") print(driver.page_source) driver.quit() ```
Headless Browser with Proxy
from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Optionschrome_options = Options() chrome_options.add_argument("--headless=new") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage")
sw_options = { "proxy": { "http": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", "https": "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080", } }
driver = webdriver.Chrome(options=chrome_options, seleniumwire_options=sw_options) driver.get("https://httpbin.org/ip") ip_address = driver.find_element("tag name", "body").text print(f"Proxy IP: {ip_address}") driver.quit() ```
Aiohttp (Async Proxy)
For high-concurrency scraping, aiohttp with async/await provides superior throughput.
Basic Async Proxy
import aiohttp
import asyncioasync def fetch(url): proxy = "http://YOUR_USERNAME:YOUR_PASSWORD@gate.hexproxies.com:8080"
async with aiohttp.ClientSession() as session: async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=30)) as response: data = await response.json() return data
result = asyncio.run(fetch("https://httpbin.org/ip")) print(result) ```
Concurrent Rotating Proxy Requests
import aiohttp
import asyncio
import uuidasync def fetch_with_rotation(session, url, username, password): session_id = uuid.uuid4().hex[:8] proxy_user = f"{username}-session-{session_id}" proxy = f"http://{proxy_user}:{password}@gate.hexproxies.com:8080"
try: async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=30)) as response: data = await response.json() return {"ip": data.get("origin"), "status": response.status} except (aiohttp.ClientError, asyncio.TimeoutError) as e: return {"error": str(e)}
async def main(): urls = ["https://httpbin.org/ip"] * 20 username = "YOUR_USERNAME" password = "YOUR_PASSWORD"
connector = aiohttp.TCPConnector(limit=10) async with aiohttp.ClientSession(connector=connector) as session: tasks = [fetch_with_rotation(session, url, username, password) for url in urls] results = await asyncio.gather(*tasks)
for i, result in enumerate(results): print(f"Request {i+1}: {result}")
asyncio.run(main()) ```
Rate-Limited Async Requests
import aiohttp
import asyncio
import uuidclass RateLimiter: def __init__(self, rate): self.rate = rate self.semaphore = asyncio.Semaphore(rate)
async def acquire(self): await self.semaphore.acquire() asyncio.get_event_loop().call_later(1.0, self.semaphore.release)
async def scrape_with_limits(urls, username, password, concurrency=10): limiter = RateLimiter(concurrency) connector = aiohttp.TCPConnector(limit=concurrency) results = []
async with aiohttp.ClientSession(connector=connector) as session: async def fetch(url): await limiter.acquire() session_id = uuid.uuid4().hex[:8] proxy = f"http://{username}-session-{session_id}:{password}@gate.hexproxies.com:8080" try: async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=30)) as resp: return await resp.text() except Exception as e: return f"Error: {e}"
tasks = [fetch(url) for url in urls] results = await asyncio.gather(*tasks)
return results ```
Error Handling and Retries
Retry Decorator for Requests
import time
import requests
import uuid
from functools import wrapsdef retry_with_proxy(max_retries=3, backoff_factor=1.0): def decorator(func): @wraps(func) def wrapper(args, kwargs): last_exception = None for attempt in range(max_retries): try: return func(args, *kwargs) except (requests.exceptions.ProxyError, requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: last_exception = e wait_time = backoff_factor (2 * attempt) print(f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time}s...") time.sleep(wait_time) except requests.exceptions.HTTPError as e: if e.response.status_code in (429, 503): last_exception = e wait_time = backoff_factor (2 ** attempt) time.sleep(wait_time) else: raise raise last_exception return wrapper return decorator
@retry_with_proxy(max_retries=3) def fetch_page(url, username, password): session_id = uuid.uuid4().hex[:8] proxies = { "http": f"http://{username}-session-{session_id}:{password}@gate.hexproxies.com:8080", "https": f"http://{username}-session-{session_id}:{password}@gate.hexproxies.com:8080", } response = requests.get(url, proxies=proxies, timeout=30) response.raise_for_status() return response.text ```
Environment Variable Configuration
For production deployments, avoid hardcoding credentials:
import osPROXY_USER = os.environ["HEX_PROXY_USER"] PROXY_PASS = os.environ["HEX_PROXY_PASS"] PROXY_HOST = os.environ.get("HEX_PROXY_HOST", "gate.hexproxies.com") PROXY_PORT = os.environ.get("HEX_PROXY_PORT", "8080")
proxies = { "http": f"http://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}", "https": f"http://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}", } ```
Set environment variables before running your script:
export HEX_PROXY_USER="your_username"
export HEX_PROXY_PASS="your_password"
python your_script.py