diff --git a/requirements.txt b/requirements.txt index a7c609c..686db44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ bullet nltk EbookLib BeautifulSoup4 -PyMultiDictionary \ No newline at end of file +PyMultiDictionary +translate \ No newline at end of file diff --git a/src/ankimaker/commands/__init__.py b/src/ankimaker/commands/__init__.py index ef2ac72..c77eb24 100644 --- a/src/ankimaker/commands/__init__.py +++ b/src/ankimaker/commands/__init__.py @@ -1,3 +1,4 @@ from .base_click import cli from .from_csv import generate_anki from .make_config import make_csv_config +from .from_epub import process_epub diff --git a/src/ankimaker/commands/from_epub.py b/src/ankimaker/commands/from_epub.py index 90c18d7..579d65a 100644 --- a/src/ankimaker/commands/from_epub.py +++ b/src/ankimaker/commands/from_epub.py @@ -1,14 +1,13 @@ import click from ankimaker.commands import cli -from ankimaker.tasks import process_epub +from ankimaker.tasks.epub_to_anki import process_epub @cli.command('epub') @click.option('-i', '--input', 'input_file', type=click.Path(exists=True)) @click.option('-o', '--output', 'output_file', type=click.Path(exists=False)) @click.option('-l', '--lang', 'language', default=None, type=click.STRING) -@click.option('-n', '--name', 'name', default=None, type=click.STRING) +@click.option('-n', '--name', 'name', required=False, type=click.STRING) def generate_anki(input_file, output_file, language, name): process_epub(input_file=input_file, output_file=output_file, language=language, deck_name=name) - raise NotImplementedError() diff --git a/src/ankimaker/tasks/__init__.py b/src/ankimaker/tasks/__init__.py index d529eb7..0c8588a 100644 --- a/src/ankimaker/tasks/__init__.py +++ b/src/ankimaker/tasks/__init__.py @@ -1,3 +1,3 @@ from .basic_csv_to_anki import basic_pandas_to_anki from .config_tasks import create_config, enhance_config -from .epub import process_epub +from . import dictionary diff --git a/src/ankimaker/tasks/dictionary.py b/src/ankimaker/tasks/dictionary.py index e69de29..ce63463 100644 --- a/src/ankimaker/tasks/dictionary.py +++ b/src/ankimaker/tasks/dictionary.py @@ -0,0 +1,24 @@ +from multiprocessing import Pool +from itertools import repeat +from typing import Iterable, Optional +from http.client import RemoteDisconnected as HttpClientRemoteDisconnected + +from PyMultiDictionary import MultiDictionary + + +def get_and_process_word_definition(language: str, word: str) -> Optional[str]: + try: + dictionary = MultiDictionary() + definition = dictionary.meaning(lang=language, word=word) + if len(definition[1]) <= 1: + return None + definition = definition[1].split('.')[0] + except HttpClientRemoteDisconnected: + return None + return definition + + +def get_word_definitions_from_dictionary(language: str, word_collection: Iterable[str]) -> Iterable[str]: + with Pool(7) as p: + definitions = p.starmap(get_and_process_word_definition, zip(repeat(language), word_collection)) + return definitions diff --git a/src/ankimaker/tasks/epub/__init__.py b/src/ankimaker/tasks/epub/__init__.py index 7613e7f..7acd743 100644 --- a/src/ankimaker/tasks/epub/__init__.py +++ b/src/ankimaker/tasks/epub/__init__.py @@ -1 +1 @@ -from .process_epub import process_epub +from . load_epub import generate_corpus_from_epub_file diff --git a/src/ankimaker/tasks/epub/load_epub.py b/src/ankimaker/tasks/epub/load_epub.py index dfb1c12..3d47e34 100644 --- a/src/ankimaker/tasks/epub/load_epub.py +++ b/src/ankimaker/tasks/epub/load_epub.py @@ -71,5 +71,5 @@ def generate_corpus_from_epub_file(input_path): w = w.lower() if w not in sw and len(w) > 1: german_corpus.append(w) - return epub + return german_corpus diff --git a/src/ankimaker/tasks/epub/process_epub.py b/src/ankimaker/tasks/epub/process_epub.py deleted file mode 100644 index 65053de..0000000 --- a/src/ankimaker/tasks/epub/process_epub.py +++ /dev/null @@ -1,6 +0,0 @@ -from .load_epub import generate_corpus_from_epub_file - - -def process_epub(input_file, output_file, language, deck_name): - corpus = generate_corpus_from_epub_file(input_file) - raise NotImplementedError() diff --git a/src/ankimaker/tasks/epub_to_anki.py b/src/ankimaker/tasks/epub_to_anki.py new file mode 100644 index 0000000..3cadc38 --- /dev/null +++ b/src/ankimaker/tasks/epub_to_anki.py @@ -0,0 +1,24 @@ +from ankimaker import generator + +from ankimaker.tasks import epub +from ankimaker.tasks import dictionary + + +def create_collection_and_filter_out_on_empty_definitions(words_from_epub, definitions): + collection = [(words, defi) for words, defi in zip(words_from_epub, definitions) if defi is not None] + return collection + + +def process_epub(input_file, output_file, language, deck_name): + words_from_epub = epub.generate_corpus_from_epub_file(input_file) + definitions = dictionary.get_word_definitions_from_dictionary(language, words_from_epub) + collection = create_collection_and_filter_out_on_empty_definitions(words_from_epub, definitions) + generator_engine = generator.QuestionAnswerGenerator() + + deck = generator.deck.create_deck(deck_name) + + words_from_epub, definitions = map(list, zip(*collection)) + cards = generator_engine.get_cards(words_from_epub, definitions) + for card in cards: + deck.add_note(card) + generator.deck.save_deck(deck, output_file) diff --git a/src/ankimaker/tasks/translation.py b/src/ankimaker/tasks/translation.py deleted file mode 100644 index e69de29..0000000