-
Notifications
You must be signed in to change notification settings - Fork 1
/
google_scraper.py
37 lines (29 loc) · 971 Bytes
/
google_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 2 18:47:12 2015
@author: keith
Google image scraper test
"""
import urllib2
import simplejson
import cStringIO
import urllib
import time
from PIL import Image
fetcher = urllib2.build_opener()
searchTerm = 'parrot'
start = time.time()
files = []
for startIndex in range(0,60,4):
searchUrl = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=" + searchTerm + "&start={:d}".format(startIndex)
f = fetcher.open(searchUrl)
deserialized_output = simplejson.load(f)
for i in range(4):
imageUrl = deserialized_output['responseData']['results'][i]['unescapedUrl']
files.append(cStringIO.StringIO(urllib.urlopen(imageUrl).read()))
# try:
# img = Image.open(files[-1])
# img.show()
# except IOError:
# print "Image load failed, continuing with next image, there are plenty of fish in the sea!"
print time.time()-start