From f1d45d43ebc0b0348e362c0b9a44bbd7c41ba425 Mon Sep 17 00:00:00 2001 From: Jacob Errington Date: Mon, 2 Nov 2015 17:15:11 -0500 Subject: [PATCH] add jbovlaste xml export download script The readme is also amended to use this script rather than wget, since the introduction of the captcha on jbovlaste has broken the old way. Closes #4 --- download.py | 28 ++++++++++++++++++++++++++++ readme.yml | 6 ++++-- requirements.txt | 1 + 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 download.py diff --git a/download.py b/download.py new file mode 100644 index 0000000..0c6108a --- /dev/null +++ b/download.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +import requests + +CREDENTIALS = dict( + username='YOUR JBOVLASTE USERNAME', + password='YOUR JBOVLASTE PASSWORD', +) + +# jbovlaste definitions language to download +LANG = 'en' + +if __name__ == '__main__': + s = requests.Session() + r = s.post( + 'http://jbovlaste.lojban.org/login.html', + data=CREDENTIALS, + ) + r.raise_for_status() + + r = s.get( + 'http://jbovlaste.lojban.org/export/xml-export.html?lang=' + LANG, + ) + + r.raise_for_status() + + with open('vlasisku/data/jbovlaste.xml', 'wb') as f: + f.write(r.text.encode('utf-8')) diff --git a/readme.yml b/readme.yml index 9fb1565..725ef0d 100644 --- a/readme.yml +++ b/readme.yml @@ -13,8 +13,6 @@ Recommended for developers: Installation for developers: | git clone https://github.com/lojban/vlasisku cd vlasisku - wget 'http://jbovlaste.lojban.org/export/xml-export.html?lang=en' \ - -O vlasisku/data/jbovlaste.xml # Isolated Python environment mkvirtualenv --no-site-packages vlasisku @@ -22,6 +20,10 @@ Installation for developers: | # Installs to that environment pip install -r requirements.txt + # Modify download.py to use your jbovlaste credentials, then use it + # to download a jbovlaste XML dump. (It takes a while to run.) + ./download.py + # This takes some 20 seconds the first time # and must be done whenever the jbovlaste export is changed ./manage.py runserver diff --git a/requirements.txt b/requirements.txt index 3e6cfbd..187dad8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ simplejson pystemmer Twisted jellyfish +requests