1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
import time import requests import re import os.path import pickle import logging import sys from datetime import datetime from bs4 import BeautifulSoup from pyinstapaper.instapaper import Instapaper, Folder, Bookmark
reload(sys) sys.setdefaultencoding('utf8')
INSTAPAPER_KEY = '************************' INSTAPAPER_SECRET = '*********************' INSTAPAPER_LOGIN = '[email protected]' INSTAPAPER_PASSWORD = 'password'
novel_list = ["苏厨", "王老实的幸福生活", "大魔王又出手了"] novel_url = ['392_392855', '7_7669', '431_431648']
instapaper = Instapaper(INSTAPAPER_KEY, INSTAPAPER_SECRET) instapaper.login(INSTAPAPER_LOGIN, INSTAPAPER_PASSWORD)
def fetch_novel(novel_list, novel_url): if os.path.isfile('url.pkl'): with open('url.pkl') as f: last_url = pickle.load(f) f.close() else: last_url=[[],[],[],[]]
url_archve = [] for j in range(0,len(novel_list)): old_url=last_url[j] url = 'https://www.xinxs.la/'+novel_url[j]+'/' urlm = 'https://m.xinxs.la/'+novel_url[j]+'/' head = {} head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19' page = requests.get(url) soup = BeautifulSoup(page.content,'lxml') soup_text = soup.find_all("a", href=re.compile("\d+\.html"), style="") latest_url=[] latest_title=[] for i in range(0,len(soup_text)): if "/" in soup_text[i]['href']: continue
latest_url.append(urlm + soup_text[i]['href']) latest_title.append(novel_list[j]+'---'+soup_text[i].string.encode('utf-8'))
for k in range(0,len(latest_url)): if latest_url[k] in old_url: continue data = { 'time': time.time(), 'progress_timestamp': 0, 'title': latest_title[k], 'url': latest_url[k] } bookmark = Bookmark(instapaper, **data) bookmark.add()
old_url=latest_url url_archive.append(old_url)
with open('url.pkl', 'w') as f: pickle.dump(url_archive,f) f.close()
bookmarks = instapaper.get_bookmarks('unread') for ct, bookmark in enumerate(bookmarks): bookmark.archive() bookmark.delete()
fetch_novel(novel_list, novel_url)
|