Browse Source

Basic processing xlsx into a xml with fixed column names and many supositions about filwriting.

main
gabriel becker 2 years ago
commit
deb228a91d
  1. 2
      requirements.txt
  2. 0
      setup.py
  3. 0
      src/__init__.py
  4. 97
      src/__main__.py

2
requirements.txt

@ -0,0 +1,2 @@
pandas
openpyxl

0
src/__init__.py

97
src/__main__.py

@ -0,0 +1,97 @@
import os
import zipfile
import numpy as np
import pandas as pd
from pathlib import Path
import xml.etree.ElementTree as xmltree
from xml.dom import minidom
def load_kmz(kmz_file_path):
print(Path(kmz_file_path).resolve())
print(Path(kmz_file_path).resolve().exists())
file_to_extract = 'doc.kml'
xml = None
try:
with zipfile.ZipFile(kmz_file_path) as z:
xml = xmltree.fromstring(z.read(file_to_extract))
except:
print("Invalid file")
return xml
def load_xlsx(xlsx_file_path):
df = pd.read_excel(xlsx_file_path)
return df.to_dict('records')
def remove_all_current_placemarkers(xml):
doc_elements = xml.find('{http://www.opengis.net/kml/2.2}Document')
for stuff in doc_elements.findall('{http://www.opengis.net/kml/2.2}Placemark'):
doc_elements.remove(stuff)
return xml
def append_placemarks_to_xml(xml, placemarks):
doc = xml.find('{http://www.opengis.net/kml/2.2}Document')
for placemark in placemarks:
doc.append(placemark)
return xml
def map_dit(dict_list):
items = list()
name_columns = ['Price', 'Includes Bills']
description_columns = ['Url', 'Min Satay', 'Notes']
location_column = 'Location'
for item in dict_list:
name = ' '.join([str(item[x]) for x in name_columns])
description = '\n'.join([str(item[x]) for x in description_columns if item[x] is not np.nan])
location = parse_location(item[location_column])
items.append(dict(name=name, description=description, location=location))
return items
def parse_location(text_location):
location_coordinates = text_location.replace(' ', '').split(',')
location_coordinates = [location_coordinates[1] , location_coordinates[0]]
location_coordinates.append('0')
location = ','.join(location_coordinates)
return location
def map_dict_element_to_placemark_xml(element: dict):
xml_element = xmltree.Element('{http://www.opengis.net/kml/2.2}Placemark')
xml_name = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}name')
xml_name.text = element['name']
xml_description = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}description')
xml_description.text = element['description']
xml_style_url = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}styleUrl')
xml_style_url.text = '#icon-1603-E65100'
xml_point = xmltree.SubElement(xml_element, '{http://www.opengis.net/kml/2.2}Point')
xml_coordinates = xmltree.SubElement(xml_point, '{http://www.opengis.net/kml/2.2}coordinates')
xml_coordinates.text = element['location']
return xml_element
def main():
dict_elements = map_dit(load_xlsx('data/housing.xlsx'))
placemarks = [map_dict_element_to_placemark_xml(dict_element) for dict_element in dict_elements]
kmz_xml = load_kmz('data/Acoomodations.kmz')
kmz_xml = remove_all_current_placemarkers(kmz_xml)
kmz_xml = append_placemarks_to_xml(kmz_xml, placemarks)
xmltree.register_namespace("", 'http://www.opengis.net/kml/2.2')
tree = xmltree.ElementTree(kmz_xml)
with open ('testoutput.kml', "w") as file:
rough_string = xmltree.tostring(kmz_xml, 'utf-8')
reparsed = minidom.parseString(rough_string)
content = reparsed.toprettyxml(indent="\t")
file.write(content)
if __name__ == '__main__':
main()
Loading…
Cancel
Save