From 19fe61b656ba3c88cbb166aa7ff2affb5353d5b2 Mon Sep 17 00:00:00 2001 From: gabriel becker Date: Thu, 21 Jul 2022 19:07:04 -0300 Subject: [PATCH] Addapt data fodler to accepet data engineering ocnvention https://towardsdatascience.com/the-importance-of-layered-thinking-in-data-engineering-a09f685edc71 --- cookiecutter.json | 2 +- {{ cookiecutter.repo_name }}/Makefile | 20 +------------------ .../data/{external => 01_raw}/.gitkeep | 0 .../data/{interim => 02_refined}/.gitkeep | 0 .../data/{processed => 03_trusted}/.gitkeep | 0 .../data/{raw => 04_feature}/.gitkeep | 0 .../data/05_model_input/.gitkeep | 0 .../data/06_models/.gitkeep | 0 .../data/07_model_output/.gitkeep | 0 .../data/08_reporting/.gitkeep | 0 .../docs/commands.rst | 5 ----- .../docs/getting-started.rst | 3 --- 12 files changed, 2 insertions(+), 28 deletions(-) rename {{ cookiecutter.repo_name }}/data/{external => 01_raw}/.gitkeep (100%) rename {{ cookiecutter.repo_name }}/data/{interim => 02_refined}/.gitkeep (100%) rename {{ cookiecutter.repo_name }}/data/{processed => 03_trusted}/.gitkeep (100%) rename {{ cookiecutter.repo_name }}/data/{raw => 04_feature}/.gitkeep (100%) create mode 100644 {{ cookiecutter.repo_name }}/data/05_model_input/.gitkeep create mode 100644 {{ cookiecutter.repo_name }}/data/06_models/.gitkeep create mode 100644 {{ cookiecutter.repo_name }}/data/07_model_output/.gitkeep create mode 100644 {{ cookiecutter.repo_name }}/data/08_reporting/.gitkeep diff --git a/cookiecutter.json b/cookiecutter.json index b5e7121..c717c05 100644 --- a/cookiecutter.json +++ b/cookiecutter.json @@ -1,7 +1,7 @@ { "project_name": "project_name", "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}", - "package_name": "{{ cookiecutter.package_name.lower().replace(' ', '_') }}", + "package_name": "package_name", "author_name": "Your name (or your organization/company/team)", "description": "A short description of the project.", "open_source_license": ["MIT", "BSD-3-Clause", "No license file"], diff --git a/{{ cookiecutter.repo_name }}/Makefile b/{{ cookiecutter.repo_name }}/Makefile index cf9406b..310ff6a 100644 --- a/{{ cookiecutter.repo_name }}/Makefile +++ b/{{ cookiecutter.repo_name }}/Makefile @@ -1,12 +1,10 @@ -.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3 +.PHONY: clean data lint requirements ################################################################################# # GLOBALS # ################################################################################# PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -BUCKET = {{ cookiecutter.s3_bucket }} -PROFILE = {{ cookiecutter.aws_profile }} PROJECT_NAME = {{ cookiecutter.repo_name }} PYTHON_INTERPRETER = {{ cookiecutter.python_interpreter }} @@ -38,22 +36,6 @@ clean: lint: flake8 src -## Upload Data to S3 -sync_data_to_s3: -ifeq (default,$(PROFILE)) - aws s3 sync data/ s3://$(BUCKET)/data/ -else - aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE) -endif - -## Download Data from S3 -sync_data_from_s3: -ifeq (default,$(PROFILE)) - aws s3 sync s3://$(BUCKET)/data/ data/ -else - aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE) -endif - ## Set up python interpreter environment create_environment: ifeq (True,$(HAS_CONDA)) diff --git a/{{ cookiecutter.repo_name }}/data/external/.gitkeep b/{{ cookiecutter.repo_name }}/data/01_raw/.gitkeep similarity index 100% rename from {{ cookiecutter.repo_name }}/data/external/.gitkeep rename to {{ cookiecutter.repo_name }}/data/01_raw/.gitkeep diff --git a/{{ cookiecutter.repo_name }}/data/interim/.gitkeep b/{{ cookiecutter.repo_name }}/data/02_refined/.gitkeep similarity index 100% rename from {{ cookiecutter.repo_name }}/data/interim/.gitkeep rename to {{ cookiecutter.repo_name }}/data/02_refined/.gitkeep diff --git a/{{ cookiecutter.repo_name }}/data/processed/.gitkeep b/{{ cookiecutter.repo_name }}/data/03_trusted/.gitkeep similarity index 100% rename from {{ cookiecutter.repo_name }}/data/processed/.gitkeep rename to {{ cookiecutter.repo_name }}/data/03_trusted/.gitkeep diff --git a/{{ cookiecutter.repo_name }}/data/raw/.gitkeep b/{{ cookiecutter.repo_name }}/data/04_feature/.gitkeep similarity index 100% rename from {{ cookiecutter.repo_name }}/data/raw/.gitkeep rename to {{ cookiecutter.repo_name }}/data/04_feature/.gitkeep diff --git a/{{ cookiecutter.repo_name }}/data/05_model_input/.gitkeep b/{{ cookiecutter.repo_name }}/data/05_model_input/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/{{ cookiecutter.repo_name }}/data/06_models/.gitkeep b/{{ cookiecutter.repo_name }}/data/06_models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/{{ cookiecutter.repo_name }}/data/07_model_output/.gitkeep b/{{ cookiecutter.repo_name }}/data/07_model_output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/{{ cookiecutter.repo_name }}/data/08_reporting/.gitkeep b/{{ cookiecutter.repo_name }}/data/08_reporting/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/{{ cookiecutter.repo_name }}/docs/commands.rst b/{{ cookiecutter.repo_name }}/docs/commands.rst index 56e9e4a..f1a4ea7 100644 --- a/{{ cookiecutter.repo_name }}/docs/commands.rst +++ b/{{ cookiecutter.repo_name }}/docs/commands.rst @@ -3,8 +3,3 @@ Commands The Makefile contains the central entry points for common tasks related to this project. -Syncing data to S3 -^^^^^^^^^^^^^^^^^^ - -* `make sync_data_to_s3` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://{{ cookiecutter.s3_bucket }}/data/`. -* `make sync_data_from_s3` will use `aws s3 sync` to recursively sync files from `s3://{{ cookiecutter.s3_bucket }}/data/` to `data/`. diff --git a/{{ cookiecutter.repo_name }}/docs/getting-started.rst b/{{ cookiecutter.repo_name }}/docs/getting-started.rst index b4f71c3..90be568 100644 --- a/{{ cookiecutter.repo_name }}/docs/getting-started.rst +++ b/{{ cookiecutter.repo_name }}/docs/getting-started.rst @@ -1,6 +1,3 @@ Getting started =============== -This is where you describe how to get set up on a clean install, including the -commands necessary to get the raw data (using the `sync_data_from_s3` command, -for example), and then how to make the cleaned, final data sets.