抓取一些数据做分析

2020年2月7日农历正月十四,疫情渐现拐点,每小时抓取一次,如果失败没有重抓机制

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import os.path as Path
import threading
import time
import shutil
import urllib2

full_fn = ''
fn=''

def heart_beat():
  global full_fn
  global fn
  nfn = time.strftime('%Y%m%d') + '_wuhan.dat'
  if full_fn!='':
    if fn != nfn:
      print fn
      if Path.exists(full_fn):
        shutil.move(full_fn, '/home/fa/data/' + fn)
      fn = nfn
      full_fn = '/ramdisk/' + fn
  else:
    fn = nfn
    full_fn = '/ramdisk/' + fn

  url = "https://view.inews.qq.com/g2/getOnsInfo"
  ip = "0.0.0.0"

  try:
    req = urllib2.Request(url)
    res_data = urllib2.urlopen(req)
    ip = res_data.read()
    print ip
  except:
    print "error"

  dt = time.strftime('%Y-%m-%d %H:%M:%S')
  fo = open(full_fn, "a+")
  fo.write(dt + '|' + ip + '\n')
  fo.close()

  threading.Timer(3600, heart_beat).start()

threading.Timer(10, heart_beat).start()

下边再想怎么处理......

参考资料:
https://news.qq.com//zt2020/page/feiyan.htm
https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5

标签: none

添加新评论