Compare commits
No commits in common. "9aff3d7d431594bb62e92983064bcc73c3d3d3bb" and "0c452726dbd37578ab3449ff7af9cdee77504724" have entirely different histories.
9aff3d7d43
...
0c452726db
@ -1,10 +1,8 @@
|
|||||||
click
|
click
|
||||||
genanki
|
genanki
|
||||||
pandas==1.5.2
|
pandas
|
||||||
pyyaml
|
pyyaml
|
||||||
bullet
|
bullet
|
||||||
nltk
|
nltk
|
||||||
EbookLib
|
EbookLib
|
||||||
BeautifulSoup4
|
BeautifulSoup4
|
||||||
PyMultiDictionary
|
|
||||||
translate
|
|
@ -1,4 +1,3 @@
|
|||||||
from .base_click import cli
|
from .base_click import cli
|
||||||
from .from_csv import generate_anki
|
from .from_csv import generate_anki
|
||||||
from .make_config import make_csv_config
|
from .make_config import make_csv_config
|
||||||
from .from_epub import process_epub
|
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
import click
|
import click
|
||||||
|
|
||||||
from ankimaker.commands import cli
|
from ankimaker.commands import cli
|
||||||
from ankimaker.tasks.epub_to_anki import process_epub
|
from ankimaker.tasks import process_epub
|
||||||
|
|
||||||
|
|
||||||
@cli.command('epub')
|
@cli.command('epub')
|
||||||
@click.option('-i', '--input', 'input_file', type=click.Path(exists=True))
|
@click.option('-i', '--input', 'input_file', type=click.Path(exists=True))
|
||||||
@click.option('-o', '--output', 'output_file', type=click.Path(exists=False))
|
@click.option('-o', '--output', 'output_file', type=click.Path(exists=False))
|
||||||
@click.option('-l', '--lang', 'language', default=None, type=click.STRING)
|
@click.option('-l', '--lang', 'language', default=None, type=click.STRING)
|
||||||
@click.option('-n', '--name', 'name', required=False, type=click.STRING)
|
@click.option('-n', '--name', 'name', default=None, type=click.STRING)
|
||||||
def generate_anki(input_file, output_file, language, name):
|
def generate_anki(input_file, output_file, language, name):
|
||||||
process_epub(input_file=input_file, output_file=output_file, language=language, deck_name=name)
|
process_epub(input_file=input_file, output_file=output_file, language=language, deck_name=name)
|
||||||
|
raise NotImplementedError()
|
||||||
|
@ -12,20 +12,16 @@ class AnkimakerConfig(yaml.YAMLObject):
|
|||||||
question_column = None
|
question_column = None
|
||||||
answer_column = None
|
answer_column = None
|
||||||
separators = ','
|
separators = ','
|
||||||
input_language = None,
|
|
||||||
output_language = None,
|
|
||||||
filters: List[List[FilterConfig]] = list()
|
filters: List[List[FilterConfig]] = list()
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, separators=',', header=None, answer_column=None, question_column=None,
|
self, separators=',', header=None, answer_column=None, question_column=None,
|
||||||
filters=tuple(), input_language=None, output_language=None, *args, **karhs
|
filters=tuple(), *args, **karhs
|
||||||
):
|
):
|
||||||
self.answer_column = answer_column
|
self.answer_column = answer_column
|
||||||
self.question_column = question_column
|
self.question_column = question_column
|
||||||
self.header = header
|
self.header = header
|
||||||
self.separators = separators
|
self.separators = separators
|
||||||
self.input_language = input_language
|
|
||||||
self.output_language = output_language
|
|
||||||
self.filters = _conditionally_create_new_filters(filters)
|
self.filters = _conditionally_create_new_filters(filters)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -38,8 +34,6 @@ class AnkimakerConfig(yaml.YAMLObject):
|
|||||||
AnkimakerConfig.question_column = content.question_column
|
AnkimakerConfig.question_column = content.question_column
|
||||||
AnkimakerConfig.answer_column = content.answer_column
|
AnkimakerConfig.answer_column = content.answer_column
|
||||||
AnkimakerConfig.separators = content.separators
|
AnkimakerConfig.separators = content.separators
|
||||||
AnkimakerConfig.input_language = content.input_language
|
|
||||||
AnkimakerConfig.output_language = content.output_language
|
|
||||||
AnkimakerConfig.filters = _conditionally_create_new_filters(content.filters)
|
AnkimakerConfig.filters = _conditionally_create_new_filters(content.filters)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,5 +3,3 @@ from . import (
|
|||||||
)
|
)
|
||||||
from .card import create_note
|
from .card import create_note
|
||||||
from .model import create_model
|
from .model import create_model
|
||||||
from .translator_generator import TranslatorGenerator
|
|
||||||
from .question_answer_generator import QuestionAnswerGenerator
|
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
import genanki
|
|
||||||
from typing import Collection, List
|
|
||||||
|
|
||||||
from ankimaker import generator
|
|
||||||
|
|
||||||
|
|
||||||
class QuestionAnswerGenerator:
|
|
||||||
def __init__(self):
|
|
||||||
self.__model = generator.create_model()
|
|
||||||
|
|
||||||
def get_cards(self, questions: Collection[str], answers: Collection[str]) -> List[genanki.Model]:
|
|
||||||
assert len(questions) == len(answers)
|
|
||||||
cards = list()
|
|
||||||
for content_fields in zip(questions, answers):
|
|
||||||
card = generator.create_note(self.__model, fields=content_fields)
|
|
||||||
cards.append(card)
|
|
||||||
return cards
|
|
@ -1,28 +0,0 @@
|
|||||||
import genanki
|
|
||||||
from translate import Translator
|
|
||||||
from typing import Collection, List
|
|
||||||
|
|
||||||
from ankimaker import generator
|
|
||||||
|
|
||||||
|
|
||||||
class TranslatorGenerator:
|
|
||||||
def __init__(self, original_language, destination_language):
|
|
||||||
"""
|
|
||||||
:param original_language: Language of the inserted text, following https://en.wikipedia.org/wiki/ISO_639-1
|
|
||||||
:param destination_language: Language you want to translate to, following https://en.wikipedia.org/wiki/ISO_639-1
|
|
||||||
"""
|
|
||||||
self.__translator = Translator(from_lang=original_language, to_lang=destination_language)
|
|
||||||
self.__model = generator.model.create_model()
|
|
||||||
|
|
||||||
def get_cards(self, content_collection: Collection[str]) -> List[genanki.Model]:
|
|
||||||
cards = list()
|
|
||||||
for content in content_collection:
|
|
||||||
card = self._create_card(content)
|
|
||||||
cards.append(card)
|
|
||||||
return cards
|
|
||||||
|
|
||||||
def _create_card(self, content):
|
|
||||||
translation = self.__translator.translate(content)
|
|
||||||
fields = (content, translation)
|
|
||||||
card = generator.create_note(self.__model, fields)
|
|
||||||
return card
|
|
@ -1,3 +1,3 @@
|
|||||||
from .basic_csv_to_anki import basic_pandas_to_anki
|
from .basic_csv_to_anki import basic_pandas_to_anki
|
||||||
from .config_tasks import create_config, enhance_config
|
from .config_tasks import create_config, enhance_config
|
||||||
from . import dictionary
|
from .epub import process_epub
|
||||||
|
@ -18,19 +18,14 @@ def load_csv(path: str) -> pd.DataFrame:
|
|||||||
|
|
||||||
|
|
||||||
def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck:
|
def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck:
|
||||||
questions = df[Config.question_column].to_list()
|
model = generator.create_model()
|
||||||
if Config.answer_column is None:
|
|
||||||
generator_engine = generator.TranslatorGenerator(
|
for entry in df.to_dict('records'):
|
||||||
original_language=Config.input_language,
|
question = entry[Config.question_column]
|
||||||
destination_language=Config.output_language,
|
answer = entry[Config.answer_column]
|
||||||
)
|
content_fields = (question, answer)
|
||||||
cards = generator_engine.get_cards(questions)
|
note = generator.create_note(model, fields=content_fields)
|
||||||
else:
|
deck.add_note(note)
|
||||||
answers = df[Config.answer_column]
|
|
||||||
generator_engine = generator.QuestionAnswerGenerator()
|
|
||||||
cards = generator_engine.get_cards(questions, answers)
|
|
||||||
for card in cards:
|
|
||||||
deck.add_note(card)
|
|
||||||
return deck
|
return deck
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,24 +0,0 @@
|
|||||||
from multiprocessing import Pool
|
|
||||||
from itertools import repeat
|
|
||||||
from typing import Iterable, Optional
|
|
||||||
from http.client import RemoteDisconnected as HttpClientRemoteDisconnected
|
|
||||||
|
|
||||||
from PyMultiDictionary import MultiDictionary
|
|
||||||
|
|
||||||
|
|
||||||
def get_and_process_word_definition(language: str, word: str) -> Optional[str]:
|
|
||||||
try:
|
|
||||||
dictionary = MultiDictionary()
|
|
||||||
definition = dictionary.meaning(lang=language, word=word)
|
|
||||||
if len(definition[1]) <= 1:
|
|
||||||
return None
|
|
||||||
definition = definition[1].split('.')[0]
|
|
||||||
except HttpClientRemoteDisconnected:
|
|
||||||
return None
|
|
||||||
return definition
|
|
||||||
|
|
||||||
|
|
||||||
def get_word_definitions_from_dictionary(language: str, word_collection: Iterable[str]) -> Iterable[str]:
|
|
||||||
with Pool(7) as p:
|
|
||||||
definitions = p.starmap(get_and_process_word_definition, zip(repeat(language), word_collection))
|
|
||||||
return definitions
|
|
@ -1 +1 @@
|
|||||||
from . load_epub import generate_corpus_from_epub_file
|
from .process_epub import process_epub
|
||||||
|
@ -71,5 +71,5 @@ def generate_corpus_from_epub_file(input_path):
|
|||||||
w = w.lower()
|
w = w.lower()
|
||||||
if w not in sw and len(w) > 1:
|
if w not in sw and len(w) > 1:
|
||||||
german_corpus.append(w)
|
german_corpus.append(w)
|
||||||
return german_corpus
|
return epub
|
||||||
|
|
||||||
|
6
src/ankimaker/tasks/epub/process_epub.py
Normal file
6
src/ankimaker/tasks/epub/process_epub.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from .load_epub import generate_corpus_from_epub_file
|
||||||
|
|
||||||
|
|
||||||
|
def process_epub(input_file, output_file, language, deck_name):
|
||||||
|
corpus = generate_corpus_from_epub_file(input_file)
|
||||||
|
raise NotImplementedError()
|
@ -1,24 +0,0 @@
|
|||||||
from ankimaker import generator
|
|
||||||
|
|
||||||
from ankimaker.tasks import epub
|
|
||||||
from ankimaker.tasks import dictionary
|
|
||||||
|
|
||||||
|
|
||||||
def create_collection_and_filter_out_on_empty_definitions(words_from_epub, definitions):
|
|
||||||
collection = [(words, defi) for words, defi in zip(words_from_epub, definitions) if defi is not None]
|
|
||||||
return collection
|
|
||||||
|
|
||||||
|
|
||||||
def process_epub(input_file, output_file, language, deck_name):
|
|
||||||
words_from_epub = epub.generate_corpus_from_epub_file(input_file)
|
|
||||||
definitions = dictionary.get_word_definitions_from_dictionary(language, words_from_epub)
|
|
||||||
collection = create_collection_and_filter_out_on_empty_definitions(words_from_epub, definitions)
|
|
||||||
generator_engine = generator.QuestionAnswerGenerator()
|
|
||||||
|
|
||||||
deck = generator.deck.create_deck(deck_name)
|
|
||||||
|
|
||||||
words_from_epub, definitions = map(list, zip(*collection))
|
|
||||||
cards = generator_engine.get_cards(words_from_epub, definitions)
|
|
||||||
for card in cards:
|
|
||||||
deck.add_note(card)
|
|
||||||
generator.deck.save_deck(deck, output_file)
|
|
Loading…
x
Reference in New Issue
Block a user