# html下载和转换 ```python import os import urllib import urllib.request import bs4 from bs4 import BeautifulSoup def download_html(url): ''' 从服务器下载html,将其存放在当前文件夹下 便于本地BeautifulSoup调试 # BeautifulSoup解析本地html soup = BeautifulSoup(open("data.html",encoding="utf-8"), 'lxml') :param url: 进行下载网页的url :param headers: :return: ''' req = urllib.request.Request(url) req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36') # set user-agent header # req.add_header('Cookie', 'UM_dis3aa97') # 更改cookie response = urllib.request.urlopen(req) # 获得当前路径 path = os.getcwd() + '\\data.html' fo = open(path, "wb") fo.write(response.read()) fo.close() # def to_html_by_response(response, name="data.html"): """ :param name: :param response: type: http.client.HTTPResponse :return: """ path = os.getcwd() + f'{name}' fo = open(path, "wb") fo.write(response.read()) fo.close() def bs4_local_html(filename) -> bs4.BeautifulSoup: with open('baike.html', 'rb') as f: doc_html = f.read() return BeautifulSoup(doc_html, "html.parser") if __name__ == '__main__': url = "https://lishi.tianqi.com/wuhan/202105.html" download_html(url) ```