Create filters in configuration and implement it.

2022-12-08 18:43:24 +11:00
11 changed files with 50 additions and 149 deletions
--- a/.gitignore
+++ b/.gitignore
@ -157,7 +157,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-.idea/
-
-# Project Specific
-scripts/
+#.idea/
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,4 @@
 click
 genanki
 pandas
-pyyaml
-bullet
+pyyaml
--- a/setup.py
+++ b/setup.py
@ -27,7 +27,6 @@ setup(
        "genanki",
        "pandas",
        "pyyaml",
-        "bullet"
    ],
    long_description_content_type='text/markdown',
 )
--- a/src/ankimaker/commands/init.py
+++ b/src/ankimaker/commands/init.py
@ -1,3 +1,10 @@
-from .base_click import cli
-from .from_csv import generate_anki
-from .make_config import make_csv_config
+import click
+
+
+@click.group("cli")
+def cli():
+    pass
+
+
+from ..commands.from_csv import generate_anki
+from ..commands.make_config import make_csv_config
--- a/src/ankimaker/commands/base_click.py
+++ b/src/ankimaker/commands/base_click.py
@ -1,6 +0,0 @@
-import click
-
-
-@click.group("cli")
-def cli():
-    pass
--- a/src/ankimaker/commands/from_csv.py
+++ b/src/ankimaker/commands/from_csv.py
@ -1,6 +1,5 @@
-import re
 import click
-
+import re
 from ankimaker.commands import cli
 from ankimaker.tasks import basic_pandas_to_anki

--- a/src/ankimaker/config/configuration.py
+++ b/src/ankimaker/config/configuration.py
@ -14,15 +14,12 @@ class AnkimakerConfig(yaml.YAMLObject):
    separators = ','
    filters: List[List[FilterConfig]] = list()

-    def __init__(
-            self, separators=',', header=None, answer_column=None, question_column=None,
-            filters=tuple(), *args, **karhs
-    ):
-        self.answer_column = answer_column
-        self.question_column = question_column
-        self.header = header
-        self.separators = separators
-        self.filters = _conditionally_create_new_filters(filters)
+    def __init__(self, header=None, answer_column=None, question_column=None, filters=tuple()):
+        AnkimakerConfig.answer_column = answer_column
+        AnkimakerConfig.question_column = question_column
+        AnkimakerConfig.header = header
+        AnkimakerConfig.AnkimakerConfig = AnkimakerConfig
+        AnkimakerConfig.filters = list(map(lambda x: FilterConfig, filters))

    @staticmethod
    def loader(configuration_content):
@ -34,19 +31,8 @@ class AnkimakerConfig(yaml.YAMLObject):
        AnkimakerConfig.question_column = content.question_column
        AnkimakerConfig.answer_column = content.answer_column
        AnkimakerConfig.separators = content.separators
-        AnkimakerConfig.filters = _conditionally_create_new_filters(content.filters)
+        AnkimakerConfig.filters = [
+            [FilterConfig(**x) for x in or_filter]
+            for or_filter in content.filters
+        ]

-
-def _conditionally_create_new_filters(filters):
-    conf_has_filters = len(filters) > 0
-    if conf_has_filters:
-        should_cast_filter = not isinstance(filters[0][0], FilterConfig)
-        if should_cast_filter:
-            new_filters = [
-                [FilterConfig(**x) for x in or_filter]
-                for or_filter in filters
-            ]
-        else:
-            new_filters = filters
-        return new_filters
-    return list()
--- a/src/ankimaker/config/filters.py
+++ b/src/ankimaker/config/filters.py
@ -1,10 +1,7 @@
-import yaml
-
 from typing import List, Union


-class FilterConfig(yaml.YAMLObject):
-    yaml_tag = '!fitlerconfig'
+class FilterConfig:
    column: Union[str, int]
    values: Union[List[Union[int, str]], Union[int, str]]

@ -13,7 +10,7 @@ class FilterConfig(yaml.YAMLObject):
        self.values = values

    def __str__(self):
-        return f'<F({self.column}:{self.values})>'
+        return f'<ankimaker.config.filters.FilterConfig {self.column}: {self.values} >'

    def __repr__(self):
        return self.__str__()
--- a/src/ankimaker/tasks/basic_csv_to_anki.py
+++ b/src/ankimaker/tasks/basic_csv_to_anki.py
@ -3,11 +3,11 @@ import pandas as pd
 from typing import List
 from functools import reduce

-from ankimaker import generator, config
 from ankimaker.config import Config, FilterConfig
+from ankimaker import generator, config


-def load_csv(path: str) -> pd.DataFrame:
+def load_csv(path):
    df = pd.read_csv(path, header=Config.header, sep=Config.separators)
    df_columns_are_unnamed = all(map(lambda x: str(x).isnumeric(), df.columns))
    if df_columns_are_unnamed:
@ -17,7 +17,7 @@ def load_csv(path: str) -> pd.DataFrame:
    return df


-def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck:
+def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck):
    model = generator.create_model()

    for entry in df.to_dict('records'):
@ -29,7 +29,7 @@ def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck:
    return deck


-def handle_config(config_file_path: str):
+def handle_config(config_file_path):
    if config_file_path is None:
        Config.header = None
        Config.question_column = 0
@ -40,10 +40,9 @@ def handle_config(config_file_path: str):

 def apply_filters(df: pd.DataFrame) -> pd.DataFrame:
    """
-    Returns filtered dataframe removing any row that does not correspond to at least one
-    of the filter  groups defined in Configuration.
-    :param df: Original dataframe.
-    :return: Filtered Dataframe.
+
+    :param df:
+    :return:
    """
    there_are_no_filter_to_apply = len(Config.filters) == 0
    if there_are_no_filter_to_apply:
@ -54,12 +53,7 @@ def apply_filters(df: pd.DataFrame) -> pd.DataFrame:


 def load_filter_from_config(df: pd.DataFrame) -> pd.Series:
-    """
-    Given a dataframe, returns a series indicating which rows should be kept according to loaded
-    Config [AnkimakerConfig]. The rows presented in any filter group should be kept.
-    :param df: Original dataframe.
-    :return pd.Series: Boolean Series to filter df.
-    """
+
    group_filters: List[pd.Series] = list()
    for group in Config.filters:
        if len(group) > 0:
@ -72,23 +66,22 @@ def load_filter_from_config(df: pd.DataFrame) -> pd.Series:

 def create_group_filter(df: pd.DataFrame, group: List[FilterConfig]) -> pd.Series:
    """
-    Creates a boolean series indicating which rows are in the filters configuration defined
-    group to be used to filter the dataframe.
-    :param df: Input dataframe to be filtered.
-    :param group: Filter defined Group.
-    :return: Series of boolean indicating rows that are in the group.
+
+    :param df:
+    :param group:
+    :return:
    """
    rule: FilterConfig
    query: List[pd.Series] = list()
    for rule in group:
-        __assert_rule_is_valid(df, rule)
+        assert_rule_is_valid(df, rule)
        is_in_rule = df[rule.column].apply(lambda x: x in rule.values)
        query.append(is_in_rule)
    is_in_group = reduce(lambda a, b: a & b, query)
    return is_in_group


-def __assert_rule_is_valid(df: pd.DataFrame, rule: FilterConfig):
+def assert_rule_is_valid(df: pd.DataFrame, rule: FilterConfig):
    assert rule.column in df.columns


--- a/src/ankimaker/tasks/config_tasks/create_config.py
+++ b/src/ankimaker/tasks/config_tasks/create_config.py
@ -1,12 +1,10 @@
 import os
-
 import yaml
 import click
 import pandas as pd
-from typing import Type, List
-from bullet import Bullet, Input, YesNo
+from typing import Type

-from ankimaker.config import Config, FilterConfig
+from ankimaker.config import Config


 __CONFIRMATION_QUESTION = """
@ -27,93 +25,25 @@ __COMMAND_SAMPLE = """ankimaker csv \
 """


-__ADD_FILTER_QUESTION = """Do you want do add a filter to the configuration?"""
-
-
 def create_config(input_file, output_path):
+    new_config = Config()

-    separators = handle_read_option(
-        input_file, read_option='sep', sep=','
+    new_config.separators = handle_read_option(
+        input_file, read_option='sep', sep=new_config.separators
    )
-    header = handle_read_option(
-        input_file, read_option='header', header=None,
-        sep=separators, option_type=int
+    new_config.header = handle_read_option(
+        input_file, read_option='header', header=new_config.header,
+        sep=new_config.separators, option_type=int
    )

-    question_column = get_column('question')
-    answer_column = get_column('answer')
+    new_config.question_column = get_column('question')
+    new_config.answer_column = get_column('answer')

-    filters = process_filters(input_file, header, separators)
-
-    new_config = Config(
-        separators=separators,
-        header=header,
-        question_column=question_column,
-        answer_column=answer_column,
-        filters=filters
-    )
    save_file(new_config, output_path)
-
    finish_message = __SUCCESS_MESSAGE.format(command=make_sample_command(input_file, output_path))
-    click.clear()
    click.echo(finish_message)


-def process_filters(input_file, header, separators):
-    df = pd.read_csv(input_file, header=header, sep=separators)
-    filters = add_filters_to_config(df)
-    return filters
-
-
-def __inline_yes_or_no_question(question):
-    answer = YesNo(prompt=question, default='n').launch()
-    return answer
-
-
-def add_filters_to_config(df: pd.DataFrame) -> List[List[FilterConfig]]:
-    config = Config()
-    should_add_filter = __inline_yes_or_no_question(__ADD_FILTER_QUESTION)
-    while should_add_filter:
-        config = add_filter_to_or_create_filter_group(df, config)
-        should_add_filter = __inline_yes_or_no_question(__ADD_FILTER_QUESTION)
-    return config.filters
-
-
-def add_filter_to_or_create_filter_group(df: pd.DataFrame, config: Config) -> Config:
-    config_has_filters = len(config.filters) > 0
-    chosen_group = -1
-    if config_has_filters:
-        filter_options = [f'({"|".join(map(str, group)):.45s})' for group in config.filters]
-        filter_options = [f'Group{i+1}{s}' for i, s in enumerate(filter_options)]
-        cli = Bullet(
-            prompt="Select group: ",
-            choices=["Create new", *filter_options],
-            return_index=True,
-        )
-        chosen_group = cli.launch()[1] - 1
-    new_filter = create_filter_config(df)
-    if chosen_group < 0:
-        config.filters.append([new_filter])
-    else:
-        config.filters[chosen_group].append(new_filter)
-    return config
-
-
-def create_filter_config(df: pd.DataFrame) -> FilterConfig:
-    options = list(df.columns)
-    cli = Bullet(
-        prompt="Select a columns to filter: ",
-        choices=list(map(str, options)),
-        return_index=True
-    )
-    chosen = cli.launch()[1]
-    filter_column = options[chosen]
-    columns_values = df[filter_column].unique()
-    values = Input(f'Which values fo filter out? values[{columns_values}]: ').launch()
-    new_filter = FilterConfig(column=filter_column, values=values)
-    return new_filter
-
-
 def get_column(name: str) -> str:
    answer = click.prompt(f'Which is your {name} column?', type=str, confirmation_prompt=True)
    return answer
--- a/src/ankimaker/utils/files.py
+++ b/src/ankimaker/utils/files.py
@ -1,3 +1,3 @@
-def get_fyle_type(filename: str) -> str:
+def get_fyle_type(filename):
    filetype = filename.split('.')[-1] if len(filename.split('.')) > 0 else None
    return filetype