Browse Source

Make available translation instead of providing answers.

feature/epub
gabriel becker 2 years ago
parent
commit
9aff3d7d43
  1. 3
      requirements.txt
  2. 1
      src/ankimaker/commands/__init__.py
  3. 5
      src/ankimaker/commands/from_epub.py
  4. 2
      src/ankimaker/tasks/__init__.py
  5. 24
      src/ankimaker/tasks/dictionary.py
  6. 2
      src/ankimaker/tasks/epub/__init__.py
  7. 2
      src/ankimaker/tasks/epub/load_epub.py
  8. 6
      src/ankimaker/tasks/epub/process_epub.py
  9. 24
      src/ankimaker/tasks/epub_to_anki.py
  10. 0
      src/ankimaker/tasks/translation.py

3
requirements.txt

@ -6,4 +6,5 @@ bullet
nltk nltk
EbookLib EbookLib
BeautifulSoup4 BeautifulSoup4
PyMultiDictionary PyMultiDictionary
translate

1
src/ankimaker/commands/__init__.py

@ -1,3 +1,4 @@
from .base_click import cli from .base_click import cli
from .from_csv import generate_anki from .from_csv import generate_anki
from .make_config import make_csv_config from .make_config import make_csv_config
from .from_epub import process_epub

5
src/ankimaker/commands/from_epub.py

@ -1,14 +1,13 @@
import click import click
from ankimaker.commands import cli from ankimaker.commands import cli
from ankimaker.tasks import process_epub from ankimaker.tasks.epub_to_anki import process_epub
@cli.command('epub') @cli.command('epub')
@click.option('-i', '--input', 'input_file', type=click.Path(exists=True)) @click.option('-i', '--input', 'input_file', type=click.Path(exists=True))
@click.option('-o', '--output', 'output_file', type=click.Path(exists=False)) @click.option('-o', '--output', 'output_file', type=click.Path(exists=False))
@click.option('-l', '--lang', 'language', default=None, type=click.STRING) @click.option('-l', '--lang', 'language', default=None, type=click.STRING)
@click.option('-n', '--name', 'name', default=None, type=click.STRING) @click.option('-n', '--name', 'name', required=False, type=click.STRING)
def generate_anki(input_file, output_file, language, name): def generate_anki(input_file, output_file, language, name):
process_epub(input_file=input_file, output_file=output_file, language=language, deck_name=name) process_epub(input_file=input_file, output_file=output_file, language=language, deck_name=name)
raise NotImplementedError()

2
src/ankimaker/tasks/__init__.py

@ -1,3 +1,3 @@
from .basic_csv_to_anki import basic_pandas_to_anki from .basic_csv_to_anki import basic_pandas_to_anki
from .config_tasks import create_config, enhance_config from .config_tasks import create_config, enhance_config
from .epub import process_epub from . import dictionary

24
src/ankimaker/tasks/dictionary.py

@ -0,0 +1,24 @@
from multiprocessing import Pool
from itertools import repeat
from typing import Iterable, Optional
from http.client import RemoteDisconnected as HttpClientRemoteDisconnected
from PyMultiDictionary import MultiDictionary
def get_and_process_word_definition(language: str, word: str) -> Optional[str]:
try:
dictionary = MultiDictionary()
definition = dictionary.meaning(lang=language, word=word)
if len(definition[1]) <= 1:
return None
definition = definition[1].split('.')[0]
except HttpClientRemoteDisconnected:
return None
return definition
def get_word_definitions_from_dictionary(language: str, word_collection: Iterable[str]) -> Iterable[str]:
with Pool(7) as p:
definitions = p.starmap(get_and_process_word_definition, zip(repeat(language), word_collection))
return definitions

2
src/ankimaker/tasks/epub/__init__.py

@ -1 +1 @@
from .process_epub import process_epub from . load_epub import generate_corpus_from_epub_file

2
src/ankimaker/tasks/epub/load_epub.py

@ -71,5 +71,5 @@ def generate_corpus_from_epub_file(input_path):
w = w.lower() w = w.lower()
if w not in sw and len(w) > 1: if w not in sw and len(w) > 1:
german_corpus.append(w) german_corpus.append(w)
return epub return german_corpus

6
src/ankimaker/tasks/epub/process_epub.py

@ -1,6 +0,0 @@
from .load_epub import generate_corpus_from_epub_file
def process_epub(input_file, output_file, language, deck_name):
corpus = generate_corpus_from_epub_file(input_file)
raise NotImplementedError()

24
src/ankimaker/tasks/epub_to_anki.py

@ -0,0 +1,24 @@
from ankimaker import generator
from ankimaker.tasks import epub
from ankimaker.tasks import dictionary
def create_collection_and_filter_out_on_empty_definitions(words_from_epub, definitions):
collection = [(words, defi) for words, defi in zip(words_from_epub, definitions) if defi is not None]
return collection
def process_epub(input_file, output_file, language, deck_name):
words_from_epub = epub.generate_corpus_from_epub_file(input_file)
definitions = dictionary.get_word_definitions_from_dictionary(language, words_from_epub)
collection = create_collection_and_filter_out_on_empty_definitions(words_from_epub, definitions)
generator_engine = generator.QuestionAnswerGenerator()
deck = generator.deck.create_deck(deck_name)
words_from_epub, definitions = map(list, zip(*collection))
cards = generator_engine.get_cards(words_from_epub, definitions)
for card in cards:
deck.add_note(card)
generator.deck.save_deck(deck, output_file)

0
src/ankimaker/tasks/translation.py

Loading…
Cancel
Save