From eaa82edc811a669b39743c30ca5528ffaa9443e9 Mon Sep 17 00:00:00 2001 From: gabriel becker Date: Thu, 8 Dec 2022 18:43:24 +1100 Subject: [PATCH] Create filters in configuration and implement it. --- src/ankimaker/commands/__init__.py | 13 +-- src/ankimaker/commands/base_click.py | 6 ++ src/ankimaker/commands/from_csv.py | 3 +- src/ankimaker/config/__init__.py | 1 + src/ankimaker/config/configuration.py | 24 +++-- src/ankimaker/config/filters.py | 16 ++++ src/ankimaker/config/load_config.py | 5 +- src/ankimaker/generator/__init__.py | 4 +- src/ankimaker/generator/card.py | 9 ++ src/ankimaker/generator/model.py | 20 ++++ src/ankimaker/generator/models.py | 17 ++++ src/ankimaker/tasks/basic_csv_to_anki.py | 96 ++++++++++++------- .../tasks/config_tasks/create_config.py | 22 ++++- src/ankimaker/utils/files.py | 2 +- 14 files changed, 177 insertions(+), 61 deletions(-) create mode 100644 src/ankimaker/commands/base_click.py create mode 100644 src/ankimaker/config/filters.py create mode 100644 src/ankimaker/generator/card.py create mode 100644 src/ankimaker/generator/model.py create mode 100644 src/ankimaker/generator/models.py diff --git a/src/ankimaker/commands/__init__.py b/src/ankimaker/commands/__init__.py index f105f0b..ef2ac72 100644 --- a/src/ankimaker/commands/__init__.py +++ b/src/ankimaker/commands/__init__.py @@ -1,10 +1,3 @@ -import click - - -@click.group("cli") -def cli(): - pass - - -from ..commands.from_csv import generate_anki -from ..commands.make_config import make_csv_config +from .base_click import cli +from .from_csv import generate_anki +from .make_config import make_csv_config diff --git a/src/ankimaker/commands/base_click.py b/src/ankimaker/commands/base_click.py new file mode 100644 index 0000000..b113a6f --- /dev/null +++ b/src/ankimaker/commands/base_click.py @@ -0,0 +1,6 @@ +import click + + +@click.group("cli") +def cli(): + pass diff --git a/src/ankimaker/commands/from_csv.py b/src/ankimaker/commands/from_csv.py index 10062d9..5ae101e 100644 --- a/src/ankimaker/commands/from_csv.py +++ b/src/ankimaker/commands/from_csv.py @@ -1,5 +1,6 @@ -import click import re +import click + from ankimaker.commands import cli from ankimaker.tasks import basic_pandas_to_anki diff --git a/src/ankimaker/config/__init__.py b/src/ankimaker/config/__init__.py index fccb81a..9ff70ae 100644 --- a/src/ankimaker/config/__init__.py +++ b/src/ankimaker/config/__init__.py @@ -1,2 +1,3 @@ from .load_config import load_config_file from .configuration import AnkimakerConfig as Config +from .filters import FilterConfig diff --git a/src/ankimaker/config/configuration.py b/src/ankimaker/config/configuration.py index b12a182..31e452f 100644 --- a/src/ankimaker/config/configuration.py +++ b/src/ankimaker/config/configuration.py @@ -1,5 +1,8 @@ import yaml -from typing import Iterable +from typing import List + +from .filters import FilterConfig + _empty_list = () @@ -9,22 +12,27 @@ class AnkimakerConfig(yaml.YAMLObject): question_column = None answer_column = None separators = ',' - filters: Iterable[dict] = list() + filters: List[List[FilterConfig]] = list() - def __init__( - self, header=None, answer_column=None, question_column=None, filters=_empty_list - ): + def __init__(self, header=None, answer_column=None, question_column=None, filters=tuple()): AnkimakerConfig.answer_column = answer_column AnkimakerConfig.question_column = question_column AnkimakerConfig.header = header - AnkimakerConfig.filters = filters AnkimakerConfig.AnkimakerConfig = AnkimakerConfig + AnkimakerConfig.filters = list(map(lambda x: FilterConfig, filters)) @staticmethod def loader(configuration_content): - content = configuration_content['AnkimakerConfig'] + if isinstance(configuration_content, dict): + content = configuration_content['AnkimakerConfig'] + else: + content = configuration_content AnkimakerConfig.header = content.header AnkimakerConfig.question_column = content.question_column AnkimakerConfig.answer_column = content.answer_column AnkimakerConfig.separators = content.separators - AnkimakerConfig.filters = content.filters + AnkimakerConfig.filters = [ + [FilterConfig(**x) for x in or_filter] + for or_filter in content.filters + ] + diff --git a/src/ankimaker/config/filters.py b/src/ankimaker/config/filters.py new file mode 100644 index 0000000..5207449 --- /dev/null +++ b/src/ankimaker/config/filters.py @@ -0,0 +1,16 @@ +from typing import List, Union + + +class FilterConfig: + column: Union[str, int] + values: Union[List[Union[int, str]], Union[int, str]] + + def __init__(self, column: str, values: Union[List[Union[int, str]], Union[int, str]]): + self.column = column + self.values = values + + def __str__(self): + return f'' + + def __repr__(self): + return self.__str__() diff --git a/src/ankimaker/config/load_config.py b/src/ankimaker/config/load_config.py index 892b4b4..770a713 100644 --- a/src/ankimaker/config/load_config.py +++ b/src/ankimaker/config/load_config.py @@ -1,5 +1,6 @@ -from pathlib import Path +import os import yaml +from pathlib import Path from .configuration import AnkimakerConfig @@ -10,7 +11,7 @@ def load_config_file(file_path: str): :param file_path: Path to yaml file with configuration :return: Dict config """ - file_path = Path(file_path) + file_path = Path(file_path if '~' not in file_path else os.path.expanduser(file_path)) assert file_path.exists() assert file_path.is_file() with open(file_path, 'r') as file: diff --git a/src/ankimaker/generator/__init__.py b/src/ankimaker/generator/__init__.py index dfafe21..a25c744 100644 --- a/src/ankimaker/generator/__init__.py +++ b/src/ankimaker/generator/__init__.py @@ -1,5 +1,5 @@ from . import ( deck, - # models, - # card ) +from .card import create_note +from .model import create_model diff --git a/src/ankimaker/generator/card.py b/src/ankimaker/generator/card.py new file mode 100644 index 0000000..5787336 --- /dev/null +++ b/src/ankimaker/generator/card.py @@ -0,0 +1,9 @@ +import genanki + + +def create_note(model, fields): + note = genanki.Note( + model=model, + fields=fields + ) + return note diff --git a/src/ankimaker/generator/model.py b/src/ankimaker/generator/model.py new file mode 100644 index 0000000..a4627da --- /dev/null +++ b/src/ankimaker/generator/model.py @@ -0,0 +1,20 @@ +import genanki + + +def create_model(): + my_model = genanki.Model( + 1607392319, + 'Simple Model', + fields=[ + {'name': 'Question'}, + {'name': 'Answer'}, + ], + templates=[ + { + 'name': 'Card 1', + 'qfmt': '
{{Question}}
', + 'afmt': '{{FrontSide}}
{{Answer}}
', + }, + ] + ) + return my_model diff --git a/src/ankimaker/generator/models.py b/src/ankimaker/generator/models.py new file mode 100644 index 0000000..0068b6a --- /dev/null +++ b/src/ankimaker/generator/models.py @@ -0,0 +1,17 @@ +import genanki as anki + +simple_flashcard = anki.Model( + 16073923194617823, + name='simple_flashcard', + fields=[ + {'name': 'word'}, + {'name': 'meaning'} + ], + templates=[ + { + 'name': 'geneticname', + 'qfmt': '{{word}}', + 'afmt': '{{FrontSide}}
{{meaning}}' + } + ] +) diff --git a/src/ankimaker/tasks/basic_csv_to_anki.py b/src/ankimaker/tasks/basic_csv_to_anki.py index 3183c25..8700669 100644 --- a/src/ankimaker/tasks/basic_csv_to_anki.py +++ b/src/ankimaker/tasks/basic_csv_to_anki.py @@ -1,59 +1,35 @@ import genanki import pandas as pd +from typing import List +from functools import reduce -from ankimaker.config import Config from ankimaker import generator, config +from ankimaker.config import Config, FilterConfig -def create_model(): - my_model = genanki.Model( - 1607392319, - 'Simple Model', - fields=[ - {'name': 'Question'}, - {'name': 'Answer'}, - ], - templates=[ - { - 'name': 'Card 1', - 'qfmt': '
{{Question}}
', - 'afmt': '{{FrontSide}}
{{Answer}}
', - }, - ] - ) - return my_model - - -def create_note(model, fields): - note = genanki.Note( - model=model, - fields=fields - ) - return note - - -def load_csv(path): +def load_csv(path: str) -> pd.DataFrame: df = pd.read_csv(path, header=Config.header, sep=Config.separators) df_columns_are_unnamed = all(map(lambda x: str(x).isnumeric(), df.columns)) if df_columns_are_unnamed: Config.answer_column = int(Config.answer_column) Config.question_column = int(Config.question_column) + df = apply_filters(df) return df -def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck): - model = create_model() +def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck: + model = generator.create_model() for entry in df.to_dict('records'): question = entry[Config.question_column] answer = entry[Config.answer_column] content_fields = (question, answer) - note = create_note(model, fields=content_fields) + note = generator.create_note(model, fields=content_fields) deck.add_note(note) return deck -def handle_config(config_file_path): +def handle_config(config_file_path: str): if config_file_path is None: Config.header = None Config.question_column = 0 @@ -62,6 +38,60 @@ def handle_config(config_file_path): config.load_config_file(config_file_path) +def apply_filters(df: pd.DataFrame) -> pd.DataFrame: + """ + Returns filtered dataframe removing any row that does not correspond to at least one + of the filter groups defined in Configuration. + :param df: Original dataframe. + :return: Filtered Dataframe. + """ + there_are_no_filter_to_apply = len(Config.filters) == 0 + if there_are_no_filter_to_apply: + return df + is_in_configured_filter_rules = load_filter_from_config(df) + df_filtered = df[is_in_configured_filter_rules] + return df_filtered + + +def load_filter_from_config(df: pd.DataFrame) -> pd.Series: + """ + Given a dataframe, returns a series indicating which rows should be kept according to loaded + Config [AnkimakerConfig]. The rows presented in any filter group should be kept. + :param df: Original dataframe. + :return pd.Series: Boolean Series to filter df. + """ + group_filters: List[pd.Series] = list() + for group in Config.filters: + if len(group) > 0: + group_filters.append( + create_group_filter(df, group) + ) + config_filter = reduce(lambda a, b: a | b, group_filters) + return config_filter + + +def create_group_filter(df: pd.DataFrame, group: List[FilterConfig]) -> pd.Series: + """ + Creates a boolean series indicating which rows are in the filters configuration defined + group to be used to filter the dataframe. + :param df: Input dataframe to be filtered. + :param group: Filter defined Group. + :return: Series of boolean indicating rows that are in the group. + """ + rule: FilterConfig + query: List[pd.Series] = list() + for rule in group: + __assert_rule_is_valid(df, rule) + is_in_rule = df[rule.column].apply(lambda x: x in rule.values) + query.append(is_in_rule) + is_in_group = reduce(lambda a, b: a & b, query) + return is_in_group + + +def __assert_rule_is_valid(df: pd.DataFrame, rule: FilterConfig): + assert rule.column in df.columns + + def basic_pandas_to_anki(csv_path, output_path, name, config_file_path): handle_config(config_file_path) df = load_csv(csv_path) diff --git a/src/ankimaker/tasks/config_tasks/create_config.py b/src/ankimaker/tasks/config_tasks/create_config.py index 96c9c07..32e06d1 100644 --- a/src/ankimaker/tasks/config_tasks/create_config.py +++ b/src/ankimaker/tasks/config_tasks/create_config.py @@ -1,3 +1,4 @@ +import os import yaml import click import pandas as pd @@ -23,6 +24,7 @@ __COMMAND_SAMPLE = """ankimaker csv \ --conf {output} """ + def create_config(input_file, output_path): new_config = Config() @@ -33,10 +35,20 @@ def create_config(input_file, output_path): input_file, read_option='header', header=new_config.header, sep=new_config.separators, option_type=int ) + + new_config.question_column = get_column('question') + new_config.answer_column = get_column('answer') + + save_file(new_config, output_path) finish_message = __SUCCESS_MESSAGE.format(command=make_sample_command(input_file, output_path)) click.echo(finish_message) +def get_column(name: str) -> str: + answer = click.prompt(f'Which is your {name} column?', type=str, confirmation_prompt=True) + return answer + + def handle_read_option(input_file, read_option, option_type: Type = str, **kargs): preview: str is_finished = False @@ -66,12 +78,14 @@ def load_preview(input_file, *args, **kargs): def save_file(config: Config, file_path): - f = open(file_path, 'w') - yaml.dump(config, f) + if '~' in file_path: + file_path = os.path.expanduser(file_path) + with open(file_path, 'w') as f: + yaml.dump(config, f) -def make_sample_command(inputf, output): +def make_sample_command(input_config, output): command = __COMMAND_SAMPLE.format( - input=inputf, output=output + input=input_config, output=output ) return command diff --git a/src/ankimaker/utils/files.py b/src/ankimaker/utils/files.py index cba0542..326a356 100644 --- a/src/ankimaker/utils/files.py +++ b/src/ankimaker/utils/files.py @@ -1,3 +1,3 @@ -def get_fyle_type(filename): +def get_fyle_type(filename: str) -> str: filetype = filename.split('.')[-1] if len(filename.split('.')) > 0 else None return filetype