Contents

Redis代理IP池

Contents

:::tip Redis搭建爬虫代理IP池 :::

# -*- coding: utf-8 -*-
"""
@file: proxy.py
@desc: 爬虫IP代理池
@Author: Chenzq
@Wechat: 15690833097
@contact: czq181020@gmail.com
"""

import json
import requests
import redis
import datetime, time

from taobao_sale.settings import (REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, PROXY_POOL_COUNT, PROXY_PULL_COUNT, PROXY_URL)


class RedisClient(object):
    def __init__(self, host=REDIS_HOST, port=REDIS_PORT):
        if REDIS_PASSWORD:
            self._db = redis.StrictRedis(host=host, port=port, password=REDIS_PASSWORD)
        else:
            # pool = redis.ConnectionPool(host=host, port=port, decode_responses=True)
            self._db = redis.StrictRedis(host=host, port=port, db=0)

    def get(self):
        """
        get proxies from redis
        self._db.ttl('http://111.126.76.119:4583') 查看过期时间
        """
        if int(len(self._db.keys())) < PROXY_POOL_COUNT:
            self.put()
        # proxie = random.choice(list(self._db.smembers("proxies")))
        proxie = str(self._db.randomkey(), encoding="utf-8")
        return proxie

    def put(self):
        """
        add proxy to right top
        """

        result = requests.get(url=PROXY_URL % (str(PROXY_PULL_COUNT)))
        rst = json.loads(result.text)
        datas = rst.get("data")
        if datas:
            for data in datas:
                current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                expire_time = data.get("expire_time")
                proxie = "http://" + str(data.get('ip')) + ':' + str(data.get('port'))
                current_time = int(time.mktime(time.strptime(current_time, "%Y-%m-%d %H:%M:%S")))
                expire_time = int(time.mktime(time.strptime(expire_time, "%Y-%m-%d %H:%M:%S")))
                expire_time = expire_time - (current_time + 30)
                # print('过期秒数', expire_time)
                # new_proxie_list.append(proxie)
                self._db.setex(name=proxie, value=1, time=expire_time)
            print('add proxies successful')
        else:
            print('调用代理失败:', rst.get("msg"))

    def dele(self, proxie):
        """
        del proxy from right.
        """
        dele_rest = self._db.delete(proxie)
        if dele_rest:
            print(dele_rest, proxie)
            pass
        proxie = self.get()
        return proxie

    # @property
    def queue_len(self):
        """
        get length from queue.
        """
        return self._db.scard("proxies")

    def flush(self):
        """
        flush db
        """
        self._db.flushall()

    def tttl(self, proxie):
        expire_time = self._db.ttl(proxie)

        return expire_time


if __name__ == '__main__':
    conn = RedisClient()
    # conn.get()
    # outtime = conn.tttl(get)
    # print(outtime)
    # conn.dele(get)
    # conn.flush()