Compare commits

..

1 Commits

Author SHA1 Message Date
gabriel becker
791caa3624 Create filters in configuration and implement it. 2022-12-08 18:43:24 +11:00
11 changed files with 50 additions and 149 deletions

5
.gitignore vendored
View File

@ -157,7 +157,4 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/ #.idea/
# Project Specific
scripts/

View File

@ -2,4 +2,3 @@ click
genanki genanki
pandas pandas
pyyaml pyyaml
bullet

View File

@ -27,7 +27,6 @@ setup(
"genanki", "genanki",
"pandas", "pandas",
"pyyaml", "pyyaml",
"bullet"
], ],
long_description_content_type='text/markdown', long_description_content_type='text/markdown',
) )

View File

@ -1,3 +1,10 @@
from .base_click import cli import click
from .from_csv import generate_anki
from .make_config import make_csv_config
@click.group("cli")
def cli():
pass
from ..commands.from_csv import generate_anki
from ..commands.make_config import make_csv_config

View File

@ -1,6 +0,0 @@
import click
@click.group("cli")
def cli():
pass

View File

@ -1,6 +1,5 @@
import re
import click import click
import re
from ankimaker.commands import cli from ankimaker.commands import cli
from ankimaker.tasks import basic_pandas_to_anki from ankimaker.tasks import basic_pandas_to_anki

View File

@ -14,15 +14,12 @@ class AnkimakerConfig(yaml.YAMLObject):
separators = ',' separators = ','
filters: List[List[FilterConfig]] = list() filters: List[List[FilterConfig]] = list()
def __init__( def __init__(self, header=None, answer_column=None, question_column=None, filters=tuple()):
self, separators=',', header=None, answer_column=None, question_column=None, AnkimakerConfig.answer_column = answer_column
filters=tuple(), *args, **karhs AnkimakerConfig.question_column = question_column
): AnkimakerConfig.header = header
self.answer_column = answer_column AnkimakerConfig.AnkimakerConfig = AnkimakerConfig
self.question_column = question_column AnkimakerConfig.filters = list(map(lambda x: FilterConfig, filters))
self.header = header
self.separators = separators
self.filters = _conditionally_create_new_filters(filters)
@staticmethod @staticmethod
def loader(configuration_content): def loader(configuration_content):
@ -34,19 +31,8 @@ class AnkimakerConfig(yaml.YAMLObject):
AnkimakerConfig.question_column = content.question_column AnkimakerConfig.question_column = content.question_column
AnkimakerConfig.answer_column = content.answer_column AnkimakerConfig.answer_column = content.answer_column
AnkimakerConfig.separators = content.separators AnkimakerConfig.separators = content.separators
AnkimakerConfig.filters = _conditionally_create_new_filters(content.filters) AnkimakerConfig.filters = [
def _conditionally_create_new_filters(filters):
conf_has_filters = len(filters) > 0
if conf_has_filters:
should_cast_filter = not isinstance(filters[0][0], FilterConfig)
if should_cast_filter:
new_filters = [
[FilterConfig(**x) for x in or_filter] [FilterConfig(**x) for x in or_filter]
for or_filter in filters for or_filter in content.filters
] ]
else:
new_filters = filters
return new_filters
return list()

View File

@ -1,10 +1,7 @@
import yaml
from typing import List, Union from typing import List, Union
class FilterConfig(yaml.YAMLObject): class FilterConfig:
yaml_tag = '!fitlerconfig'
column: Union[str, int] column: Union[str, int]
values: Union[List[Union[int, str]], Union[int, str]] values: Union[List[Union[int, str]], Union[int, str]]
@ -13,7 +10,7 @@ class FilterConfig(yaml.YAMLObject):
self.values = values self.values = values
def __str__(self): def __str__(self):
return f'<F({self.column}:{self.values})>' return f'<ankimaker.config.filters.FilterConfig {self.column}: {self.values} >'
def __repr__(self): def __repr__(self):
return self.__str__() return self.__str__()

View File

@ -3,11 +3,11 @@ import pandas as pd
from typing import List from typing import List
from functools import reduce from functools import reduce
from ankimaker import generator, config
from ankimaker.config import Config, FilterConfig from ankimaker.config import Config, FilterConfig
from ankimaker import generator, config
def load_csv(path: str) -> pd.DataFrame: def load_csv(path):
df = pd.read_csv(path, header=Config.header, sep=Config.separators) df = pd.read_csv(path, header=Config.header, sep=Config.separators)
df_columns_are_unnamed = all(map(lambda x: str(x).isnumeric(), df.columns)) df_columns_are_unnamed = all(map(lambda x: str(x).isnumeric(), df.columns))
if df_columns_are_unnamed: if df_columns_are_unnamed:
@ -17,7 +17,7 @@ def load_csv(path: str) -> pd.DataFrame:
return df return df
def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck: def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck):
model = generator.create_model() model = generator.create_model()
for entry in df.to_dict('records'): for entry in df.to_dict('records'):
@ -29,7 +29,7 @@ def add_df_to_deck(df: pd.DataFrame, deck: genanki.Deck) -> genanki.Deck:
return deck return deck
def handle_config(config_file_path: str): def handle_config(config_file_path):
if config_file_path is None: if config_file_path is None:
Config.header = None Config.header = None
Config.question_column = 0 Config.question_column = 0
@ -40,10 +40,9 @@ def handle_config(config_file_path: str):
def apply_filters(df: pd.DataFrame) -> pd.DataFrame: def apply_filters(df: pd.DataFrame) -> pd.DataFrame:
""" """
Returns filtered dataframe removing any row that does not correspond to at least one
of the filter groups defined in Configuration. :param df:
:param df: Original dataframe. :return:
:return: Filtered Dataframe.
""" """
there_are_no_filter_to_apply = len(Config.filters) == 0 there_are_no_filter_to_apply = len(Config.filters) == 0
if there_are_no_filter_to_apply: if there_are_no_filter_to_apply:
@ -54,12 +53,7 @@ def apply_filters(df: pd.DataFrame) -> pd.DataFrame:
def load_filter_from_config(df: pd.DataFrame) -> pd.Series: def load_filter_from_config(df: pd.DataFrame) -> pd.Series:
"""
Given a dataframe, returns a series indicating which rows should be kept according to loaded
Config [AnkimakerConfig]. The rows presented in any filter group should be kept.
:param df: Original dataframe.
:return pd.Series: Boolean Series to filter df.
"""
group_filters: List[pd.Series] = list() group_filters: List[pd.Series] = list()
for group in Config.filters: for group in Config.filters:
if len(group) > 0: if len(group) > 0:
@ -72,23 +66,22 @@ def load_filter_from_config(df: pd.DataFrame) -> pd.Series:
def create_group_filter(df: pd.DataFrame, group: List[FilterConfig]) -> pd.Series: def create_group_filter(df: pd.DataFrame, group: List[FilterConfig]) -> pd.Series:
""" """
Creates a boolean series indicating which rows are in the filters configuration defined
group to be used to filter the dataframe. :param df:
:param df: Input dataframe to be filtered. :param group:
:param group: Filter defined Group. :return:
:return: Series of boolean indicating rows that are in the group.
""" """
rule: FilterConfig rule: FilterConfig
query: List[pd.Series] = list() query: List[pd.Series] = list()
for rule in group: for rule in group:
__assert_rule_is_valid(df, rule) assert_rule_is_valid(df, rule)
is_in_rule = df[rule.column].apply(lambda x: x in rule.values) is_in_rule = df[rule.column].apply(lambda x: x in rule.values)
query.append(is_in_rule) query.append(is_in_rule)
is_in_group = reduce(lambda a, b: a & b, query) is_in_group = reduce(lambda a, b: a & b, query)
return is_in_group return is_in_group
def __assert_rule_is_valid(df: pd.DataFrame, rule: FilterConfig): def assert_rule_is_valid(df: pd.DataFrame, rule: FilterConfig):
assert rule.column in df.columns assert rule.column in df.columns

View File

@ -1,12 +1,10 @@
import os import os
import yaml import yaml
import click import click
import pandas as pd import pandas as pd
from typing import Type, List from typing import Type
from bullet import Bullet, Input, YesNo
from ankimaker.config import Config, FilterConfig from ankimaker.config import Config
__CONFIRMATION_QUESTION = """ __CONFIRMATION_QUESTION = """
@ -27,93 +25,25 @@ __COMMAND_SAMPLE = """ankimaker csv \
""" """
__ADD_FILTER_QUESTION = """Do you want do add a filter to the configuration?"""
def create_config(input_file, output_path): def create_config(input_file, output_path):
new_config = Config()
separators = handle_read_option( new_config.separators = handle_read_option(
input_file, read_option='sep', sep=',' input_file, read_option='sep', sep=new_config.separators
) )
header = handle_read_option( new_config.header = handle_read_option(
input_file, read_option='header', header=None, input_file, read_option='header', header=new_config.header,
sep=separators, option_type=int sep=new_config.separators, option_type=int
) )
question_column = get_column('question') new_config.question_column = get_column('question')
answer_column = get_column('answer') new_config.answer_column = get_column('answer')
filters = process_filters(input_file, header, separators)
new_config = Config(
separators=separators,
header=header,
question_column=question_column,
answer_column=answer_column,
filters=filters
)
save_file(new_config, output_path) save_file(new_config, output_path)
finish_message = __SUCCESS_MESSAGE.format(command=make_sample_command(input_file, output_path)) finish_message = __SUCCESS_MESSAGE.format(command=make_sample_command(input_file, output_path))
click.clear()
click.echo(finish_message) click.echo(finish_message)
def process_filters(input_file, header, separators):
df = pd.read_csv(input_file, header=header, sep=separators)
filters = add_filters_to_config(df)
return filters
def __inline_yes_or_no_question(question):
answer = YesNo(prompt=question, default='n').launch()
return answer
def add_filters_to_config(df: pd.DataFrame) -> List[List[FilterConfig]]:
config = Config()
should_add_filter = __inline_yes_or_no_question(__ADD_FILTER_QUESTION)
while should_add_filter:
config = add_filter_to_or_create_filter_group(df, config)
should_add_filter = __inline_yes_or_no_question(__ADD_FILTER_QUESTION)
return config.filters
def add_filter_to_or_create_filter_group(df: pd.DataFrame, config: Config) -> Config:
config_has_filters = len(config.filters) > 0
chosen_group = -1
if config_has_filters:
filter_options = [f'({"|".join(map(str, group)):.45s})' for group in config.filters]
filter_options = [f'Group{i+1}{s}' for i, s in enumerate(filter_options)]
cli = Bullet(
prompt="Select group: ",
choices=["Create new", *filter_options],
return_index=True,
)
chosen_group = cli.launch()[1] - 1
new_filter = create_filter_config(df)
if chosen_group < 0:
config.filters.append([new_filter])
else:
config.filters[chosen_group].append(new_filter)
return config
def create_filter_config(df: pd.DataFrame) -> FilterConfig:
options = list(df.columns)
cli = Bullet(
prompt="Select a columns to filter: ",
choices=list(map(str, options)),
return_index=True
)
chosen = cli.launch()[1]
filter_column = options[chosen]
columns_values = df[filter_column].unique()
values = Input(f'Which values fo filter out? values[{columns_values}]: ').launch()
new_filter = FilterConfig(column=filter_column, values=values)
return new_filter
def get_column(name: str) -> str: def get_column(name: str) -> str:
answer = click.prompt(f'Which is your {name} column?', type=str, confirmation_prompt=True) answer = click.prompt(f'Which is your {name} column?', type=str, confirmation_prompt=True)
return answer return answer

View File

@ -1,3 +1,3 @@
def get_fyle_type(filename: str) -> str: def get_fyle_type(filename):
filetype = filename.split('.')[-1] if len(filename.split('.')) > 0 else None filetype = filename.split('.')[-1] if len(filename.split('.')) > 0 else None
return filetype return filetype