import requests
from fake_useragent import UserAgent ###随机获取uaimport urllib3
import random ##随机from requests.adapters import HTTPAdapter ### 重试
1
2
3
4
5
get——requests
defsendGetRequest(url):
s = requests.Session()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)try:
data = s.get(url, headers=self.headers, verify=False)except Exception as e:print(e)returnNonereturn data
1
2
3
4
5
6
7
8
9
添加cookie的get的请求
defsendGetByCookie(url, cookie):
s = requests.Session()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
headers ={# 'user-agent':random.sample(user_agent_w_list[0], 1),'user-agent': UserAgent(verify_ssl=False).random,# 'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36''Cookie':cookie
}try:
data = s.get(url, headers=headers, verify=False)except Exception as e:print(e)returnNonereturn data
defsendGetByProxy(url, proxies, cookie, paramDict):'''
:param url:
:param proxies: proxies = {'http': 'http://localhost:8888', 'https': 'http://localhost:8888'}
:param cookie:
:param paramDict:
:return:
'''
s = requests.Session()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
headers ={'user-agent': UserAgent(verify_ssl=False).random,'Connection':'close'###释放keep_live}if cookie !=None:
headers['Cookie']= cookie
if paramDict:
headers.update(paramDict)#######重试
s.mount('http://', HTTPAdapter(max_retries=3))
s.mount('https://', HTTPAdapter(max_retries=3))if proxies:try:####设置超时 timeout=5 allow_redirects=False 取消跳转
data = s.get(url, headers=headers, proxies=proxies, verify=False, allow_redirects=False, timeout=10)# requests.adapters.DEFAULT_RETRIES = 5 ####重试次数
s.keep_alive =False####关闭不必要的链接except Exception as e:print('访问:',url,e)
data = self.cycle(s,headers, url)return data
else:try:
data = s.get(url, headers=headers, verify=False)
requests.adapters.DEFAULT_RETRIES =5
s.keep_alive =Falseexcept Exception as e:print(e)returnNoneprint(data)return data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
ip出错自动获取新的ip重新获取
defcycle(s, headers, url):'''
如果ip出错就获取新的ip
:param s:
:param headers:
:param url:
:return:
'''
times =0
success =False
data =Nonewhile times <5andnot success:try:####设置超时 timeout=5 allow_redirects=False 取消跳转
data = s.get(url,
headers=headers,
proxies=self.getProxy(),
verify=False,
allow_redirects=False,
timeout=10)# requests.adapters.DEFAULT_RETRIES = 5 ####重试次数
s.keep_alive =False####关闭不必要的链接
success =Trueifnot data.status_code ==200:print(url, data.status_code)
success =Falseexcept Exception as e:print('访问:', e)return data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
获取代理ip
defgetProxy(proxie):
proxies ={}
ip = random.sample(proxie.keys(),1)[0]
port = proxie[ip]
ip = ip.decode('unicode_escape')
port = port.decode('unicode_escape')
ip_port = ip +':'+ port
print('获取到的代理为', ip_port)
proxies['http']='http://'+ ip_port
proxies['https']='https://'+ ip_port
return proxies