Python 3.8
#importamos la libreria beautifulsoup4
#importamos la libreria beautifulsoup4
from bs4 import BeautifulSoup
import requests
# Importamos para tener una tarea asincrona
import threading
google_news_url = 'https://news.google.com/'
def set_robot(article):
title = article.find('a', { 'class' : 'DY5T1d RZIKme'}).getText()
url = article.find('a').get('href')
#print(' - ', title)
print(' - ', url)
def scraping_site():
re = requests.get( google_news_url )
if re.status_code == 200:
# re.text : captura todo el contenido
# html.parser : nos parcea todo el contenido
soup = BeautifulSoup( re.text, 'html.parser' )
if soup is not None:
articles = soup.find_all('h3', { 'class' : 'ipQwMb ekueJc RD0gLb' })
for article in articles:
robot = threading.Thread( name=set_robot(article) ,target=set_robot, args=(article,))
robot.start
if __name__ == '__main__':
scraping_site()