From deb228a91d14b726b75f31e2534aec3e9b0434c2 Mon Sep 17 00:00:00 2001 From: gabriel becker Date: Thu, 18 Aug 2022 15:22:59 -0300 Subject: [PATCH] Basic processing xlsx into a xml with fixed column names and many supositions about filwriting. --- requirements.txt | 2 + setup.py | 0 src/__init__.py | 0 src/__main__.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+) create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 src/__init__.py create mode 100644 src/__main__.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2f5b8d3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pandas +openpyxl \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..cf02b6b --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,97 @@ +import os +import zipfile +import numpy as np +import pandas as pd +from pathlib import Path +import xml.etree.ElementTree as xmltree +from xml.dom import minidom + + +def load_kmz(kmz_file_path): + print(Path(kmz_file_path).resolve()) + print(Path(kmz_file_path).resolve().exists()) + file_to_extract = 'doc.kml' + xml = None + try: + with zipfile.ZipFile(kmz_file_path) as z: + xml = xmltree.fromstring(z.read(file_to_extract)) + except: + print("Invalid file") + return xml + + +def load_xlsx(xlsx_file_path): + df = pd.read_excel(xlsx_file_path) + return df.to_dict('records') + + +def remove_all_current_placemarkers(xml): + doc_elements = xml.find('{http://www.opengis.net/kml/2.2}Document') + for stuff in doc_elements.findall('{http://www.opengis.net/kml/2.2}Placemark'): + doc_elements.remove(stuff) + return xml + + +def append_placemarks_to_xml(xml, placemarks): + doc = xml.find('{http://www.opengis.net/kml/2.2}Document') + for placemark in placemarks: + doc.append(placemark) + return xml + + +def map_dit(dict_list): + items = list() + name_columns = ['Price', 'Includes Bills'] + description_columns = ['Url', 'Min Satay', 'Notes'] + location_column = 'Location' + for item in dict_list: + name = ' '.join([str(item[x]) for x in name_columns]) + description = '\n'.join([str(item[x]) for x in description_columns if item[x] is not np.nan]) + location = parse_location(item[location_column]) + items.append(dict(name=name, description=description, location=location)) + return items + + +def parse_location(text_location): + location_coordinates = text_location.replace(' ', '').split(',') + location_coordinates = [location_coordinates[1] , location_coordinates[0]] + location_coordinates.append('0') + location = ','.join(location_coordinates) + return location + + +def map_dict_element_to_placemark_xml(element: dict): + xml_element = xmltree.Element('{http://www.opengis.net/kml/2.2}Placemark') + xml_name = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}name') + xml_name.text = element['name'] + xml_description = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}description') + xml_description.text = element['description'] + xml_style_url = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}styleUrl') + xml_style_url.text = '#icon-1603-E65100' + xml_point = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}Point') + xml_coordinates = xmltree.SubElement(xml_point, '{http://www.opengis.net/kml/2.2}coordinates') + xml_coordinates.text = element['location'] + return xml_element + + +def main(): + dict_elements = map_dit(load_xlsx('data/housing.xlsx')) + placemarks = [map_dict_element_to_placemark_xml(dict_element) for dict_element in dict_elements] + + kmz_xml = load_kmz('data/Acoomodations.kmz') + kmz_xml = remove_all_current_placemarkers(kmz_xml) + kmz_xml = append_placemarks_to_xml(kmz_xml, placemarks) + + xmltree.register_namespace("", 'http://www.opengis.net/kml/2.2') + tree = xmltree.ElementTree(kmz_xml) + + with open ('testoutput.kml', "w") as file: + rough_string = xmltree.tostring(kmz_xml, 'utf-8') + reparsed = minidom.parseString(rough_string) + content = reparsed.toprettyxml(indent="\t") + file.write(content) + + + +if __name__ == '__main__': + main() \ No newline at end of file