# -*- encoding: utf-8 -*-
'''
aiohttp + aysncio爬取电豆瓣电影
py 3.6
sublime text3
'''
import time
import aiohttp
import asyncio
from bs4 import BeautifulSoup
now = lambda: time.perf_counter()
async def fetchHtmlText(url):
async with aiohttp.ClientSession(
headers={'users-agent':'Mozilla/5.0'},
connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.get(url) as response:
return await response.text()
async def main():
url = "https://movie.douban.com/cinema/later/beijing/"
html = await fetchHtmlText(url)
soup = BeautifulSoup(html, "html.parser")
divs = soup.find_all('div', class_='item mod')
urls = list(map(lambda x: x.a.img['src'], divs))
names = list(map(lambda x: x.h3.a.string, divs))
dats = list(map(lambda x: x.ul.li.string, divs))
lis = zip(names, dats, urls)
for i in lis:
print("{0:{3}^25} \t {1:{3}^10} \t {2:{3}^}".format(i[0], i[1], i[2],chr(12288)))
start = now()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
print("Wall time: {}".format(now() - start))
# 九龙不败 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2560169035.jpg
# 别岁 07月02日 https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2558138041.jpg
...
# 刀背藏身 07月19日 https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2557644589.jpg
# Wall time: 1.994356926937086
# [Finished in 3.7s]
展开