员工编写的爬取黑马程序员网页数据程序

from asyncio import create_eager_task_factory

from idlelib.rpc import request_queue

from msilib.schema import tables

from nt import write


import requests

from bs4 import BeautifulSoup

headers = {

   'user-agent':'Mozilla/5.0(Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/129.0.0.0 Safari/537.36'

}

url = 'https://bbs.itheima.com/foum-425-{}.html'

data_list=[]

for page in range(1,11):

   print('当前爬取第{}页'.format(page))

   new_url = url.format(page)

   res = requests.get(new_url,headers=headers)

   soup =  BeautifulSoup(res.txt,'html.parser')

   table = soup.find('table',summary='foru_425')

   tbodys=table.find_all('tbody')

   i=0

   for tbody in tbodys:

       i+=1

       if i<=2:

           continue

       title_node = tbody.find_all('a')[1]

       title=title_node.txt

       href=title_node('href')

       print(title)

       print(href)

       author=tbody.find_all('span')[0].txt

       create_time=tbody.find_all('span')[1].txt[2:]

       print(anthor)

       print(create_time)

       data_list.append(title,author,create_time,href)


import csv

with open('data.csv','w+',newline='',encoding='utf-8') as f:

   writer=csv.writer(f)

   writer.writerow('标题','作者','创建时间','链接')

   for dat in data_list:

       writer.writerow(dat)