close
import requests import re from bs4 import BeautifulSoup def main(): URL = 'https://www.dcard.tw/f' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} resp = requests.get(URL, headers=headers) soup = BeautifulSoup(resp.text, 'html.parser') articles = [] # 找出所有 role="article" 的 <article> tag article = soup.find_all('article', {"role": "article"}) for div in article: author = div.find("div",{'class': 'euk31c-2 kFrzdN'}).text.strip() title = div.find("h2").text.strip() href = div.h2.a['href'] text = div.find("div", class_="uj732l-0 eLjDMI").text.strip() articles.append({ 'author':author, 'title': title, 'href' : href, 'text' : text }) print(articles, (len(articles))) if __name__ == '__main__': main()
文章標籤
全站熱搜
留言列表