Cy23
2023-01-30
来自辽宁
import requests from bs4 import BeautifulSoup url = 'https://www.tianqi.com/shenyang/7/' def getHTMLtext(url): """请求获得网页内容""" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"} try: r = requests.get(url, timeout = 30, headers = headers) r.raise_for_status() r.encoding = r.apparent_encoding print("成功访问") return r.text except: print("访问错误") return" " def get_content(html): """处理得到有用信息保存数据文件""" final = "" # 初始化一个列表保存数据 bs = BeautifulSoup(html, "html.parser") # 创建BeautifulSoup对象 body = bs.body ul = body.find('ul', {'class': 'weaul'}) # 找到ul标签且class = weaul li = ul.find_all('li') # 找到左右的li标签 i = 0 # 控制爬取的天数 for day in li: # 遍历找到的每一个li if i < 30 and i > 0: temp = "" # 临时存放每天的数据 date = day.find(attrs={'class':'fl'}).string # 得到日期 temp += date + " " inf = day.find_all(attrs={'class':'weaul_z'}) # 找出li下面的weaul_z,提取第一个weaul_z标签的值,即天气 temp += inf[0].string + " " tem = inf[1].find_all('span') tem_low = tem[0].string # 找到最低温度 tem_high = tem[1].string # 找到最高温度 temp += tem_low+'~'+tem_high+'℃' + "\r" final += temp i = i + 1 return final if __name__ == '__main__': html_text = getHTMLtext(url) data = get_content(html_text) with open('weather.txt', 'w', encoding='UTF-8') as f: f.write(str(data))
展开
1
Ying
2023-05-04
来自上海
# 请求所在城市7天的天气数据, 并存入本地文本 # https://www.tianqi.com/shenyang/7/ import requests from bs4 import BeautifulSoup urlstr = "https://www.tianqi.com/shanghai/7/" header = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" } r = requests.get(urlstr, headers=header) bs = BeautifulSoup(r.text, "html.parser") # bs.body html的主体 # print(bs.body) body = bs.body ul = body.find("ul", {"class": "weaul"}) # print(ul) li = ul.find_all("li") # print(li) temp = "" for day in li: print(day) date = day.find("span", {"class": "fr"}).string temp += date temp += " " temp += day.find("div", {"class": "weaul_z"}).string temp += "\n" print(temp) with open("weather.txt", "w", encoding="UTF-8") as f: f.write(temp)
展开
Matthew
2023-01-19
来自江苏
# 用format()将结果打印输出 def print_data(final_list, num): print("{:^10}\t{:^8}\t{:^8}\t{:^8}\t{:^8}".format('日期', '天气', '最高温度', '最低温度', '风级')) for i in range(num): final = final_list[i] print("{:^10}\t{:^8}\t{:^8}\t{:^8}\t{:^8}".format(final[0], final[1], final[2], final[3], final[4])) # 测试 if __name__ == '__main__': # 中国天气网 url = "http://www.weather.com.cn/weather/101190101.shtml" # 调用函数,爬取网站数据,整合成列表 html = getHTMLText(url) final_list = get_data(html) print_data(final_list, 7) # 将未来7天的日期、天气、最高温度、最低温度、风级数据写入文件 with open("天气.txt", mode='w', encoding='utf-8') as f: f.write(str(final_list))
展开
Matthew
2023-01-19
来自江苏
import requests from bs4 import BeautifulSoup # 根据 url 获取 html def getHTMLText(url, timeout=30): try: r = requests.get(url, timeout=30) # 用requests抓取网页信息 r.raise_for_status() # 可以让程序产生异常时停止程序 r.encoding = r.apparent_encoding return r.text except: return '产生异常' # 根据 html 得到 最近7天的天气数据(list) def get_data(html): final_list = [] soup = BeautifulSoup(html, 'html.parser') # 用BeautifulSoup库解析网页 body = soup.body data = body.find('div', {'id': '7d'}) ul = data.find('ul') lis = ul.find_all('li') for day in lis: temp_list = [] date = day.find('h1').string # 找到日期 temp_list.append(date) info = day.find_all('p') # 找到所有的p标签 temp_list.append(info[0].string) if info[1].find('span') is None: # 找到p标签中的第二个值'span'标签——最高温度 temperature_highest = ' ' # 用一个判断是否有最高温度 else: temperature_highest = info[1].find('span').string temperature_highest = temperature_highest.replace('℃', ' ') if info[1].find('i') is None: # 找到p标签中的第二个值'i'标签——最高温度 temperature_lowest = ' ' # 用一个判断是否有最低温度 else: temperature_lowest = info[1].find('i').string temperature_lowest = temperature_lowest.replace('℃', ' ') temp_list.append(temperature_highest) # 将最高气温添加到temp_list中 temp_list.append(temperature_lowest) # 将最低气温添加到temp_list中 wind_scale = info[2].find('i').string # 找到p标签的第三个值'i'标签——风级,添加到temp_list中 temp_list.append(wind_scale) final_list.append(temp_list) # 将temp_list列表添加到final_list列表中 return final_list
展开