# request
根据url获取服务器响应 ,再使用bs4对响应进行解析;
import urllib.request
from typing import List
import bs4
from bs4 import BeautifulSoup
import time
def send_request(url: str) -> bs4.BeautifulSoup:
req = urllib.request.Request(url)
# 设置请求头
req.add_header('User-Agent',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36') # set user-agent header
req.add_header('Cookie','xxx')
response_ = urllib.request.urlopen(req)
# to_html_by_response(response_, "baike.html")
soup = BeautifulSoup(response_, "html.parser")
# 给服务器减压
time.sleep(0.3)
return soup