forked from jsanyam/My-Duniya
-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_firstPost.py
67 lines (55 loc) · 2.03 KB
/
update_firstPost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import feedparser
from urllib2 import urlopen
from bs4 import BeautifulSoup
import psycopg2
from entity_api import entity_extract
from untitled1 import db
from untitled1 import Article
rss = {
'http://www.firstpost.com/world/feed',
'http://www.firstpost.com/economy/feed',
'http://www.firstpost.com/living/feed',
'http://www.firstpost.com/sports/feed',
'http://www.firstpost.com/india/feed',
'http://www.firstpost.com/politics/feed',
'http://www.firstpost.com/business/feed',
'http://www.firstpost.com/investing/feed',
'http://www.firstpost.com/bollywood/feed',
'http://www.firstpost.com/tech/feed',
'http://www.firstpost.com/travel/feed'
}
for key in rss:
#print(key)
d = feedparser.parse(key)
for post in d.entries:
try:
html=urlopen(post.link)
bsObj=BeautifulSoup(html,"html.parser")
title=post.title
image= bsObj.find("meta",attrs={"property":"og:image"})["content"]
description=bsObj.find("meta",attrs={"property":"og:description"})["content"]
pubdate=bsObj.find("meta",attrs={"property":"article:published_time"})["content"]
full_story=bsObj.find("div",attrs={"class":"fullCont1"}).get_text()
# print title
# print image
# print description
# print pubdate
# print full_story
category = "Firstpost"
if not db.session.query(Article).filter(Article.title == title).count():
article_a = Article(title=title, full_story=full_story, image=image, category=category,
description=description, pubdate=pubdate)
db.session.add(article_a)
db.session.commit()
print article_a.id
entity_extract(article_a.id, full_story, 1)
except psycopg2.IntegrityError: # as ie:
# print ie
print"Caught"
db.session.rollback()
# break
# continue
# print "\n\n"
except Exception as e:
print e
pass