Python3 抓取本地天气和新闻,写入html文件
Python3 抓取本地天气和新闻,写入html文件
pip install --upgrade pip;
pip install feedparser requests beautifulsoup4 selenium;
#!/usr/bin/env python3
# coding=utf-8
import json
import time
import requests
from bs4 import BeautifulSoup
import re
import socket
socket.setdefaulttimeout(10)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0','Accept':'*/*'}
def radar():
s2 = ' '
req = requests.get('http://m.nmc.cn/f/wap/p-94', timeout = 10)
# 280= xian, 94=huabei, 92=quanguo
req.encoding = 'utf-8'
soup = BeautifulSoup(req.text, "html.parser")
s1 = soup.find_all(attrs={'id':'imgpath'})
s2 = s2+ '<a href='+ s1[0]['src'].replace('medium','small') +'>RadarXA</a>'
req = requests.get('http://m.nmc.cn/f/wap/p-92', timeout = 10)
req.encoding = 'utf-8'
soup = BeautifulSoup(req.text, "html.parser")
s1 = soup.find_all(attrs={'id':'imgpath'})
s2 = s2+ ' <a href='+ s1[0]['src'] +'>RadarCN</a>'
req = requests.get('http://m.nmc.cn/f/wap/p-337', timeout = 10)
req.encoding = 'utf-8'
soup = BeautifulSoup(req.text, "html.parser")
s1 = soup.find_all(attrs={'id':'imgpath'})
s2 = s2+ ' <a href='+ s1[0]['src'].replace('medium','small') +'>Rain24</a><br>\n'
print('radar done')
return s2
def weather():
req = requests.get( 'http://www.nmc.cn/f/rest/real/57036', timeout = 10)
html = req.text
d = json.loads(html)
req = requests.get( 'http://www.nmc.cn/f/rest/aqi/57036', timeout = 10)
html = req.text
d2 = json.loads(html)
s = '{0} {1}C Air.{2} {3}{4}m/s'.format(d['weather']['info'], d['weather']['temperature'], d2['aqi'], d['wind']['direct'], d['wind']['speed'])
print('weather done')
return s
def cnbeta():
s = '<h3><a href=http://m.cnbeta.com/wap>CnBeta</a></h3>\n'
req = requests.get('https://m.cnbeta.com/wap', timeout = 20)
req.encoding = 'utf-8'
soup = BeautifulSoup(req.text, "html.parser")
s2 = soup.find_all(attrs={'class':'list'}, limit=36)
for i in s2:
s += '<a href=https://m.cnbeta.com'+i.a.get('href')[-16:]+' target=_blank>'+i.a.string+'</a><br>\n'
print('CB done')
return s
def hsw():
s = '<h3>HSW NEWS XIAN</h3>\n'
req = requests.get('http://news.hsw.cn/sx08/xaxw08/', timeout = 30)
req.encoding = 'utf-8'
soup = BeautifulSoup(req.text, "html.parser")
st = soup.find_all('h3', limit=26)
for i in range(1,18):
s += ('<a href='+st[i].a.get('href')+' target=_blank>'+st[i].string+'</a><br>\n')
print('HSW done')
return s
sout = '<head><meta charset="utf-8"/><style type="text/css"> <!-- A { text-decoration: none; line-height:1.5;} --> </style><title>新闻Swds</title></head><body bgcolor=#eeeeee>'
sout = sout + time.strftime(" %y-%m-%d %H:%M:%S ", time.localtime())
try: sout = sout + radar() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + 'radar err<br>'
try: sout = sout + weather() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + '<br>weather err'
try: sout = sout + hsw() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + '<br>hsw err'
try: sout = sout + cnbeta() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + 'cnbeta err'
sout = sout + time.strftime(" %H:%M:%S ", time.localtime()) +'</body>'
with open('snews.html', 'w', encoding='UTF-8') as f: f.write(sout)