import base64
import sys
PY3 = sys.version_info[0] >= 3
def base64ify(bytes_or_str):
input_bytes = bytes_or_str if PY3 else bytes_or_str.encode('utf8')
output_bytes = base64.urlsafe_b64encode(input_bytes)
return output_bytes.decode('ascii') if PY3 else output_bytes
class ProxyMiddleware2(object):
def process_request(self, request, spider):
# 选择一个代理服务器
proxyHost = "u6791.5.tn.16yun"
proxyPort = "31111"
# 设置IP地址和端口号
request.meta['proxy'] = "http://{0}:{1}".format(proxyHost, proxyPort)
# 设置代理用户名和密码(根据需要开启/关闭)
# proxyUser = "16EDRSSX"
# proxyPass = "214587"
# request.headers['Proxy-Authorization'] = 'Basic ' + base64ify(proxyUser + ":" + proxyPass)
# 设置隧道(根据需要开启/关闭)
# tunnel = random.randint(1,10000)
# request.headers['Proxy-Tunnel'] = str(tunnel)
# 修改连接方式为Close,每次都切换到新的IP
request.headers['Connection'] = "Close"
import base64
import requests
import threading
proxyServer = "u6791.5.tn.16yun0"
proxyUser = "16EDRSSXxx"
proxyPass = "214587"
proxyAuth = "Basic " + base64.urlsafe_b64encode(bytes((proxyUser + ":" + proxyPass), "ascii")).decode("utf8")
proxies = {
"http": "http://{0}".format(proxyServer),
"https": "https://{0}".format(proxyServer),
}
headers = {
"Proxy-Authorization": proxyAuth,
}
LOCK = threading.Lock()
logging.basicConfig(filename='requests.log', level=logging.DEBUG)
def fetch_url(url):
try:
response = requests.get(url, proxies=proxies, headers=headers)
LOCK.acquire()
logging.debug(f'{url} Status Code: {response.status_code}')
print(threading.current_thread().name, response.status_code, url)
except Exception as e:
logging.error(f'{url} Error: {e}')
finally:
LOCK.release()
if __name__ == '__main__':
urls = ["http://www.example.com", "http://www.google.com", "http://www.github.com"]
threads = []
for url in urls:
t = threading.Thread(target=fetch_url, args=(url,))
threads.append(t)
t.start()
for t in threads:
t.join()
logging.debug('All URL requests finished')
若有收获,就点个赞吧