from asyncio import create_eager_task_factory
from idlelib.rpc import request_queue
from msilib.schema import tables
from nt import write
import requests
from bs4 import BeautifulSoup
headers = {
'user-agent':'Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/129.0.0.0 Safari/537.36'
}
url = 'https://bbs.itheima.com/foum-425-{}.html'
data_list=[]
for page in range(1,11):
print('当前爬取第{}页'.format(page))
new_url = url.format(page)
res = requests.get(new_url,headers=headers)
soup = BeautifulSoup(res.txt,'html.parser')
table = soup.find('table',summary='foru_425')
tbodys=table.find_all('tbody')
i=0
for tbody in tbodys:
i+=1
if i<=2:
continue
title_node = tbody.find_all('a')[1]
title=title_node.txt
href=title_node('href')
print(title)
print(href)
author=tbody.find_all('span')[0].txt
create_time=tbody.find_all('span')[1].txt[2:]
print(anthor)
print(create_time)
data_list.append(title,author,create_time,href)
import csv
with open('data.csv','w+',newline='',encoding='utf-8') as f:
writer=csv.writer(f)
writer.writerow('标题','作者','创建时间','链接')
for dat in data_list:
writer.writerow(dat)