Skip to content

Commit

Permalink
@0.0.8 , fixed bugs and improve load of languages stopwords files
Browse files Browse the repository at this point in the history
  • Loading branch information
tsptoni committed Dec 3, 2017
1 parent 0f2b7e5 commit a9e8655
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 19 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
'scipy',
'networkx',
],
version = '0.0.7',
version = '0.0.8',
description = 'A text summarization and keyword extraction package',
author = 'Federico Barrios, Federico Lopez, Antonio Sanchez Pineda',
author_email = '[email protected]',
Expand Down
37 changes: 21 additions & 16 deletions textrank3/preprocessing/stopwords.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# encoding: UTF-8

import json
import os.path

base_path = os.path.dirname(__file__)

english = """
all six eleven just less being indeed over both anyway detail four front already through yourselves fify
Expand Down Expand Up @@ -196,25 +199,27 @@
yhtäällä yhtäältä yhtään yhä yksi yksin yksittäin yleensä ylemmäs yli ylös ympäri älköön älä
"""

def plain_stopwords(json):
data = json.load(open(json))
def plain_stopwords(json_filename):
json_file = os.path.abspath(os.path.join(base_path, 'languages', json_filename))
data = json.load(open(json_file))
return ' '.join(data)


LANGUAGES = {
"english": 'languages/stopwords-en',
"german": 'languages/stopwords-de',
"spanish": 'languages/stopwords-es',
"french": 'languages/stopwords-fr',
"italian": 'languages/stopwords-it',
"portuguese": 'languages/stopwords-pt',
"russian": 'languages/stopwords-ru',
"danish": 'languages/stopwords-da',
"dutch": 'languages/stopwords-nl',
"finnish": 'languages/stopwords-fi',
"norwegian": 'languages/stopwords-no',
"hungarian": 'languages/stopwords-hu',
"romanian": 'languages/stopwords-ro',
"swedish": 'languages/stopwords-sv',
"english": 'stopwords-en.json',
"german": 'stopwords-de.json',
"spanish": 'stopwords-es.json',
"french": 'stopwords-fr.json',
"italian": 'stopwords-it.json',
"portuguese": 'stopwords-pt.json',
"russian": 'stopwords-ru.json',
"danish": 'stopwords-da.json',
"dutch": 'stopwords-nl.json',
"finnish": 'stopwords-fi.json',
"norwegian": 'stopwords-no.json',
"hungarian": 'stopwords-hu.json',
"romanian": 'stopwords-ro.json',
"swedish": 'stopwords-sv.json',
}


Expand Down
4 changes: 2 additions & 2 deletions textrank3/preprocessing/textcleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
def set_stemmer_language(language):
global STEMMER
if not language in LANGUAGES:
raise ValueError("Valid languages are danish, dutch, english, finnish," +
" french, german, hungarian, italian, norwegian, porter, portuguese," +
raise ValueError("Valid languages are: danish, dutch, english, finnish, " +
"french, german, hungarian, italian, norwegian, porter, portuguese, " +
"romanian, russian, spanish, swedish")
STEMMER = SnowballStemmer(language)

Expand Down

0 comments on commit a9e8655

Please sign in to comment.