-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'mod-for-making-package-with-pyinstaller'
- Loading branch information
Showing
16 changed files
with
907 additions
and
745 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[global] | ||
developmentMode = false | ||
|
||
[server] | ||
port = 8501 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import argparse | ||
import sys | ||
|
||
import tagging | ||
import genmodel | ||
import counttag | ||
|
||
def main() -> None: | ||
parser: argparse.ArgumentParser = argparse.ArgumentParser() | ||
parser.add_argument('command', nargs=1, help='command to run') | ||
# dummy | ||
parser.add_argument('--dir', nargs=1, help='') | ||
# dummy | ||
parser.add_argument('--dim', nargs=1, type=int, help='') | ||
args: argparse.Namespace = parser.parse_args() | ||
|
||
if args.command[0] == 'tagging': | ||
tagging.main(sys.argv[2:]) | ||
elif args.command[0] == 'genmodel': | ||
genmodel.main(sys.argv[2:]) | ||
elif args.command[0] == 'counttag': | ||
counttag.main() | ||
else: | ||
print('Invalid command') | ||
exit(1) | ||
|
||
main() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# -- coding: utf-8 -- | ||
|
||
from typing import Dict, List | ||
|
||
def main() -> None: | ||
tag_map: Dict[str, bool] = {} | ||
with open('tags-wd-tagger.txt', 'r', encoding='utf-8') as f: | ||
for line in f: | ||
tags: List[str] = line.strip().split(',') | ||
tags = tags[1:-1] | ||
for tag in tags: | ||
tag_map[tag] = True | ||
print(f'{len(tag_map)} unique tags found') | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1,74 @@ | ||
from gensim import corpora | ||
from gensim.models import LsiModel | ||
from gensim.similarities import MatrixSimilarity | ||
from gensim.utils import simple_preprocess | ||
import pickle | ||
from typing import List, Tuple | ||
import logging | ||
|
||
# generate corpus for gensim and index text file for search tool | ||
def read_documents_and_gen_idx_text(file_path: str) -> List[List[str]]: | ||
corpus_base: List[List[str]] = [] | ||
idx_text_fpath: str = file_path.split('.')[0] + '_lsi_idx.csv' | ||
with open(idx_text_fpath, 'w', encoding='utf-8') as idx_f: | ||
with open(file_path, 'r', encoding='utf-8') as f: | ||
for line in f: | ||
row: List[str] = line.split(",") | ||
# remove file path element | ||
row = row[1:] | ||
# # remove last element | ||
# row = row[:-1] | ||
|
||
# join tags with space for gensim | ||
tags_line: str = ' '.join(row) | ||
# tokens: List[str] = simple_preprocess(tags_line.strip()) | ||
tokens: List[str] = row | ||
# ignore simple_preprocess failure case and short tags image | ||
if tokens and len(tokens) >= 3: | ||
corpus_base.append(tokens) | ||
idx_f.write(line) | ||
idx_f.flush() | ||
return corpus_base | ||
|
||
# read image files pathes from file | ||
def read_documents(filename: str) -> List[str]: | ||
with open(filename, 'r', encoding='utf-8') as file: | ||
documents: List[str] = [line.strip() for line in file.readlines()] | ||
return documents | ||
|
||
def main() -> None: | ||
format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' | ||
logging.basicConfig( | ||
format=format_str, | ||
level=logging.DEBUG | ||
) | ||
|
||
processed_docs: List[List[str]] = read_documents_and_gen_idx_text('tags-wd-tagger.txt') | ||
|
||
# image file => doc_id | ||
dictionary: corpora.Dictionary = corpora.Dictionary(processed_docs) | ||
# remove frequent tags | ||
#dictionary.filter_n_most_frequent(500) | ||
|
||
with open('lsi_dictionary', 'wb') as f: | ||
pickle.dump(dictionary, f) | ||
|
||
corpus: List[List[Tuple[int, int]]] = [dictionary.doc2bow(doc) for doc in processed_docs] | ||
|
||
# gen LSI model with specified number of topics (dimensions) | ||
# ATTENTION: num_topics should be set to appropriate value!!! | ||
lsi_model: LsiModel = LsiModel(corpus, id2word=dictionary, num_topics=800) | ||
|
||
lsi_model.save("lsi_model") | ||
|
||
# make similarity index | ||
index: MatrixSimilarity = MatrixSimilarity(lsi_model[corpus]) | ||
|
||
index.save("lsi_index") | ||
|
||
if __name__ == "__main__": | ||
main() | ||
import argparse | ||
import sys | ||
|
||
from gensim import corpora | ||
from gensim.models import LsiModel | ||
from gensim.similarities import MatrixSimilarity | ||
import pickle | ||
from typing import List, Tuple | ||
import logging | ||
|
||
# generate corpus for gensim and index text file for search tool | ||
def read_documents_and_gen_idx_text(file_path: str) -> List[List[str]]: | ||
corpus_base: List[List[str]] = [] | ||
idx_text_fpath: str = file_path.split('.')[0] + '_lsi_idx.csv' | ||
with open(idx_text_fpath, 'w', encoding='utf-8') as idx_f: | ||
with open(file_path, 'r', encoding='utf-8') as f: | ||
for line in f: | ||
row: List[str] = line.split(",") | ||
# remove file path element | ||
row = row[1:] | ||
|
||
# tokens: List[str] = simple_preprocess(tags_line.strip()) | ||
tokens: List[str] = row | ||
# ignore simple_preprocess failure case and short tags image | ||
if tokens and len(tokens) >= 3: | ||
corpus_base.append(tokens) | ||
idx_f.write(line) | ||
idx_f.flush() | ||
|
||
return corpus_base | ||
|
||
# read image files pathes from file | ||
def read_documents(filename: str) -> List[str]: | ||
with open(filename, 'r', encoding='utf-8') as file: | ||
documents: List[str] = [line.strip() for line in file.readlines()] | ||
return documents | ||
|
||
def main(arg_str: list[str]) -> None: | ||
format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' | ||
logging.basicConfig( | ||
format=format_str, | ||
level=logging.DEBUG | ||
) | ||
|
||
parser: argparse.ArgumentParser = argparse.ArgumentParser() | ||
parser.add_argument('--dim', nargs=1, type=int, required=True, help='number of dimensions at LSI model') | ||
args: argparse.Namespace = parser.parse_args(arg_str) | ||
|
||
processed_docs: List[List[str]] = read_documents_and_gen_idx_text('tags-wd-tagger.txt') | ||
|
||
# image file => doc_id | ||
dictionary: corpora.Dictionary = corpora.Dictionary(processed_docs) | ||
# remove frequent tags | ||
#dictionary.filter_n_most_frequent(500) | ||
|
||
with open('lsi_dictionary', 'wb') as f: | ||
pickle.dump(dictionary, f) | ||
|
||
corpus: List[List[Tuple[int, int]]] = [dictionary.doc2bow(doc) for doc in processed_docs] | ||
|
||
# gen LSI model with specified number of topics (dimensions) | ||
# ATTENTION: num_topics should be set to appropriate value!!! | ||
# lsi_model: LsiModel = LsiModel(corpus, id2word=dictionary, num_topics=800) | ||
lsi_model: LsiModel = LsiModel(corpus, id2word=dictionary, num_topics=args.dim[0]) | ||
|
||
lsi_model.save("lsi_model") | ||
|
||
# make similarity index | ||
index: MatrixSimilarity = MatrixSimilarity(lsi_model[corpus]) | ||
|
||
index.save("lsi_index") | ||
|
||
if __name__ == "__main__": | ||
main(sys.argv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from PyInstaller.utils.hooks import copy_metadata | ||
datas = copy_metadata('streamlit') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
@echo off | ||
REM Batch file to package Python scripts into Windows executables | ||
REM Usage: packaging_webui.bat | ||
|
||
REM Package the script into an executable without the onefile option | ||
pyinstaller --additional-hooks-dir=./hooks --noconfirm run_webui.py --clean | ||
|
||
REM Clean up build and spec files | ||
REM RMDIR /S /Q build | ||
REM RMDIR /S /Q spec | ||
|
||
ECHO Packaging complete. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
@echo off | ||
REM Batch file to package Python scripts into Windows executables | ||
REM Usage: packaging_webui.bat | ||
|
||
REM Package the script into an executable without the onefile option | ||
pyinstaller --noconfirm run_webui.spec --clean | ||
|
||
REM Clean up build and spec files | ||
REM RMDIR /S /Q build | ||
REM RMDIR /S /Q __pycache__ | ||
|
||
ECHO Packaging complete. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import streamlit.web.cli as stcli | ||
Check failure on line 1 in run_webui.py GitHub Actions / build
Check failure on line 1 in run_webui.py GitHub Actions / build
Check failure on line 1 in run_webui.py GitHub Actions / build
Check failure on line 1 in run_webui.py GitHub Actions / build
Check failure on line 1 in run_webui.py GitHub Actions / build
|
||
import os | ||
import sys | ||
|
||
def streamlit_run(): | ||
src = os.path.dirname(sys.executable) + '/webui.py' | ||
sys.argv=['streamlit', 'run', src, '--global.developmentMode=false'] | ||
sys.exit(stcli.main()) | ||
|
||
if __name__ == "__main__": | ||
streamlit_run() |
Oops, something went wrong.