Redis代理IP池
Contents
:::tip Redis搭建爬虫代理IP池 :::
# -*- coding: utf-8 -*-
"""
@file: proxy.py
@desc: 爬虫IP代理池
@Author: Chenzq
@Wechat: 15690833097
@contact: czq181020@gmail.com
"""
import json
import requests
import redis
import datetime, time
from taobao_sale.settings import (REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, PROXY_POOL_COUNT, PROXY_PULL_COUNT, PROXY_URL)
class RedisClient(object):
def __init__(self, host=REDIS_HOST, port=REDIS_PORT):
if REDIS_PASSWORD:
self._db = redis.StrictRedis(host=host, port=port, password=REDIS_PASSWORD)
else:
# pool = redis.ConnectionPool(host=host, port=port, decode_responses=True)
self._db = redis.StrictRedis(host=host, port=port, db=0)
def get(self):
"""
get proxies from redis
self._db.ttl('http://111.126.76.119:4583') 查看过期时间
"""
if int(len(self._db.keys())) < PROXY_POOL_COUNT:
self.put()
# proxie = random.choice(list(self._db.smembers("proxies")))
proxie = str(self._db.randomkey(), encoding="utf-8")
return proxie
def put(self):
"""
add proxy to right top
"""
result = requests.get(url=PROXY_URL % (str(PROXY_PULL_COUNT)))
rst = json.loads(result.text)
datas = rst.get("data")
if datas:
for data in datas:
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
expire_time = data.get("expire_time")
proxie = "http://" + str(data.get('ip')) + ':' + str(data.get('port'))
current_time = int(time.mktime(time.strptime(current_time, "%Y-%m-%d %H:%M:%S")))
expire_time = int(time.mktime(time.strptime(expire_time, "%Y-%m-%d %H:%M:%S")))
expire_time = expire_time - (current_time + 30)
# print('过期秒数', expire_time)
# new_proxie_list.append(proxie)
self._db.setex(name=proxie, value=1, time=expire_time)
print('add proxies successful')
else:
print('调用代理失败:', rst.get("msg"))
def dele(self, proxie):
"""
del proxy from right.
"""
dele_rest = self._db.delete(proxie)
if dele_rest:
print(dele_rest, proxie)
pass
proxie = self.get()
return proxie
# @property
def queue_len(self):
"""
get length from queue.
"""
return self._db.scard("proxies")
def flush(self):
"""
flush db
"""
self._db.flushall()
def tttl(self, proxie):
expire_time = self._db.ttl(proxie)
return expire_time
if __name__ == '__main__':
conn = RedisClient()
# conn.get()
# outtime = conn.tttl(get)
# print(outtime)
# conn.dele(get)
# conn.flush()