Proxies for Price Monitoring
Price monitoring at scale requires reliable, diverse IP addresses to maintain stable collection from e-commerce sites. Hex Proxies provides access to a 10M+ residential IP network through partner agreements with automatic rotation, making it well-suited for collecting pricing data across thousands of products and competitors.
Architecture
Product URLs → Scheduler → Proxy Pool (gate.hexproxies.com) → Target Sites
↓
Data Pipeline → Price Database → AlertsBasic Price Monitor
import requests
from datetime import datetime
import json
import time
import random
class PriceMonitor:
def __init__(self, proxy_user, proxy_pass):
self.proxy = f"http://{proxy_user}:{proxy_pass}@gate.hexproxies.com:8080"
self.session = requests.Session()
self.session.proxies = {"http": self.proxy, "https": self.proxy}
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept-Language": "en-US,en;q=0.9",
})
def check_price(self, url, parser_func):
"""Fetch a product page and extract the price."""
try:
resp = self.session.get(url, timeout=20)
resp.raise_for_status()
price = parser_func(resp.text)
return {
"url": url,
"price": price,
"timestamp": datetime.utcnow().isoformat(),
"status": "success",
}
except Exception as e:
return {
"url": url,
"price": None,
"timestamp": datetime.utcnow().isoformat(),
"status": f"error: {str(e)}",
}
def monitor_products(self, products, interval_minutes=60):
"""Continuously monitor a list of products."""
while True:
results = []
for product in products:
result = self.check_price(product["url"], product["parser"])
results.append(result)
time.sleep(random.uniform(2, 5))
self.save_results(results)
self.check_alerts(results)
time.sleep(interval_minutes * 60)
def save_results(self, results):
with open("prices.jsonl", "a") as f:
for result in results:
f.write(json.dumps(result) + "\n")
def check_alerts(self, results):
for result in results:
if result["price"] and result.get("threshold"):
if result["price"] < result["threshold"]:
print(f"ALERT: {result['url']} dropped to {result['price']}")Scaling to Thousands of Products
import concurrent.futures
def batch_check(monitor, products, max_workers=10):
"""Check prices for many products concurrently."""
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {
executor.submit(monitor.check_price, p["url"], p["parser"]): p
for p in products
}
for future in concurrent.futures.as_completed(futures):
result = future.result()
results.append(result)
return resultsScheduling Best Practices
| Product Category | Check Frequency | Proxy Type |
|---|---|---|
| Flash sales / deals | Every 5-15 minutes | Residential rotating |
| Competitor pricing | Every 1-4 hours | Residential rotating |
| Market research | Daily | Residential rotating |
| MAP compliance | Every 30-60 minutes | ISP sticky |
Data Validation
Always validate extracted prices before storing:
def validate_price(price_str):
"""Clean and validate an extracted price."""
if not price_str:
return None
cleaned = price_str.replace("quot;, "").replace(",", "").strip()
try:
price = float(cleaned)
if price <= 0 or price > 100000:
return None
return price
except ValueError:
return NoneAnti-Detection for E-Commerce
- Rotate IPs per request (default behavior with Hex Proxies).
- Vary User-Agent strings across requests.
- Add random delays between requests (2-5 seconds for moderate volume).
- Respect robots.txt and site-specific rate limits.
- Monitor for captcha pages and adjust strategy if they appear frequently.