From 157646f497018aeb2fe3f72918ca644644811456 Mon Sep 17 00:00:00 2001 From: InoriShio <126560588+InoriShio@users.noreply.github.com> Date: Thu, 26 Feb 2026 13:46:59 +0100 Subject: [PATCH] Add files via upload --- config.json | 9 ++ xlsx_to_eml.py | 352 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 361 insertions(+) create mode 100644 config.json create mode 100644 xlsx_to_eml.py diff --git a/config.json b/config.json new file mode 100644 index 0000000..e68efbf --- /dev/null +++ b/config.json @@ -0,0 +1,9 @@ +{ + "gemeente_naam": "name", + "gemeente_code": "code", + "election_date": "YYYY-MM-DD", + "election_subcategory": "GR2", + "number_of_seats": 23, + "preference_threshold": 25, + "nomination_date": "YYYY-MM-DD" +} diff --git a/xlsx_to_eml.py b/xlsx_to_eml.py new file mode 100644 index 0000000..76eebf4 --- /dev/null +++ b/xlsx_to_eml.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 + +import json +import re +import os +import sys +from datetime import datetime +from pathlib import Path + +# Check for openpyxl +try: + from openpyxl import load_workbook +except ImportError: + print("Error: openpyxl is not installed.") + print("Please install it by running:") + print(" pip install openpyxl") + sys.exit(1) + +# Check for tkinter +try: + import tkinter as tk + from tkinter import filedialog, messagebox +except ImportError: + print("Error: tkinter is not available.") + print("Please ensure Python was installed with tkinter support.") + sys.exit(1) + +def load_config(config_path: str) -> dict: + """Load configuration from JSON file.""" + try: + with open(config_path, 'r', encoding='utf-8') as f: + return json.load(f) + except FileNotFoundError: + print(f"Error: Configuration file not found: {config_path}") + print("Please create a config.json file with the following structure:") + print(json.dumps({ + "gemeente_naam": "YourMunicipality", + "gemeente_code": "0000", + "election_date": "2026-03-18", + "election_subcategory": "GR2", + "number_of_seats": 29, + "preference_threshold": 25, + "nomination_date": "2026-02-02" + }, indent=2)) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"Error: Invalid JSON in configuration file: {e}") + sys.exit(1) + + +def find_column_index(headers: list, search_terms: list) -> int: + """Find column index by searching for terms in headers (case-insensitive).""" + for idx, header in enumerate(headers): + if header: + header_lower = str(header).lower() + for term in search_terms: + if term.lower() in header_lower: + return idx + return -1 + + +def parse_address(address: str) -> tuple: + if not address: + return ("", "") + + address = str(address).strip() + + # Regex pattern: match everything up to the last space followed by a number (with optional suffix) + pattern = r'^(.*?)\s+(\d+.*)$' + match = re.match(pattern, address) + + if match: + street = match.group(1).strip() + number = match.group(2).strip() + return (street, number) + + # If no match, return the whole address as street name + return (address, "") + + +def read_excel_data(file_path: str) -> list: + wb = load_workbook(filename=file_path, read_only=True, data_only=True) + ws = wb.active + + # Get headers from first row + headers = [cell.value for cell in ws[1]] + + # Find column indices + # Note: 'naam stembureau' must come before 'naam' to match the correct column + # when headers are like "Nummer stembureau" and "Naam stembureau" + nummer_col = find_column_index(headers, ['nummer', 'nr', 'number', 'id']) + naam_col = find_column_index(headers, ['naam stembureau', 'naam', 'name']) + straat_col = find_column_index(headers, ['straat', 'street', 'adres', 'address', 'huisnummer']) + + if nummer_col == -1: + raise ValueError("Could not find 'nummer' column in Excel file. " + "Please ensure there is a column with 'nummer' in the header.") + if naam_col == -1: + raise ValueError("Could not find 'naam' column in Excel file. " + "Please ensure there is a column with 'naam' in the header.") + if straat_col == -1: + raise ValueError("Could not find 'straat' column in Excel file. " + "Please ensure there is a column with 'straat' or 'adres' in the header.") + + print(f"Found columns: nummer={headers[nummer_col]} (col {nummer_col}), naam={headers[naam_col]} (col {naam_col}), straat={headers[straat_col]} (col {straat_col})") + + # Read data rows + polling_stations = [] + for row_idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True), start=2): + if len(row) > max(nummer_col, naam_col, straat_col): + nummer = row[nummer_col] + naam = row[naam_col] + straat = row[straat_col] + + # Debug: print first row to check column mapping + if row_idx == 2: + print(f"DEBUG Row 2: nummer={nummer}, naam={naam}, straat={straat}") + print(f"DEBUG Full row: {row}") + + # Skip empty rows + if nummer is None and naam is None: + continue + + # Convert nummer to int if possible + try: + nummer = int(nummer) + except (ValueError, TypeError): + print(f"Warning: Row {row_idx} has invalid nummer '{nummer}', skipping.") + continue + + polling_stations.append({ + 'nummer': nummer, + 'naam': str(naam) if naam else f"Stembureau {nummer}", + 'straat': str(straat) if straat else "" + }) + + wb.close() + return polling_stations + +def generate_eml_xml(config: dict, polling_stations: list) -> str: + """Generate EML_NL 110b XML content (polling stations list).""" + + gemeente_naam = config['gemeente_naam'] + gemeente_code = config['gemeente_code'] + election_date = config['election_date'] + election_subcategory = config.get('election_subcategory', 'GR2') + nomination_date = config.get('nomination_date', '') + + # Generate transaction ID + transaction_id = f"GR{election_date[:4]}{gemeente_code}" + + # Current datetime for CreationDateTime + creation_datetime = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') + + # If no nomination date provided, use election date as fallback + if not nomination_date: + nomination_date = election_date + + # Start building XML - 110b format for polling stations + xml_parts = [ + '', + '', + f' {transaction_id}', + ' ', + f' {gemeente_naam}', + ' ', + ' ', + f' {creation_datetime}', + ' ', + ' ', + ' ', + f' ', + f' Gemeenteraadsverkiezingen {gemeente_naam}', + ' GR', + f' {election_subcategory}', + f' {gemeente_naam}', + f' {election_date}', + f' {nomination_date}', + ' ', + ' ', + ' ', + ' ', + f' {gemeente_naam}', + ' ', + ' SPV', + ' 0', + ] + + # Add polling stations as PollingPlace elements inside Contest + for ps in polling_stations: + nummer = ps['nummer'] # This is the position/stemgebiedsnr + naam = escape_xml(ps['naam']) # This is the name of the stembureau + + xml_parts.extend([ + ' ', + ' ', + '
', + ' ', + f' {naam}', + ' ', + '
', + f' 0', + '
', + '
', + ]) + + # Close XML + xml_parts.extend([ + '
', + '
', + '
', + '
', + ]) + + return '\n'.join(xml_parts) + + +def escape_xml(text: str) -> str: + """Escape special XML characters.""" + if not text: + return "" + text = str(text) + text = text.replace('&', '&') + text = text.replace('<', '<') + text = text.replace('>', '>') + text = text.replace('"', '"') + text = text.replace("'", ''') + return text + + +def main(): + """Main entry point.""" + print("=" * 60) + print("Excel to EML_NL Converter") + print("Converts polling station data to EML_NL 110b XML format") + print("=" * 60) + print() + + # Determine config path (same directory as script) + script_dir = Path(__file__).parent + config_path = script_dir / 'config.json' + + # Load configuration + print(f"Loading configuration from: {config_path}") + config = load_config(str(config_path)) + print(f" Gemeente: {config['gemeente_naam']} (code: {config['gemeente_code']})") + print(f" Election date: {config['election_date']}") + print(f" Election type: {config.get('election_subcategory', 'GR2')}") + print() + + # Create hidden Tk root window + root = tk.Tk() + root.withdraw() + + # Show file open dialog + print("Opening file dialog to select Excel file...") + input_file = filedialog.askopenfilename( + title="Select Excel file with polling station data", + filetypes=[ + ("Excel files", "*.xlsx"), + ("All files", "*.*") + ] + ) + + if not input_file: + print("No file selected. Exiting.") + sys.exit(0) + + print(f"Selected file: {input_file}") + print() + + # Read Excel data + print("Reading Excel file...") + try: + polling_stations = read_excel_data(input_file) + except Exception as e: + messagebox.showerror("Error", f"Failed to read Excel file:\n{e}") + print(f"Error reading Excel file: {e}") + sys.exit(1) + + print(f"Found {len(polling_stations)} polling stations") + print() + + # Show polling stations + print("Polling stations found:") + for ps in polling_stations[:5]: # Show first 5 + print(f" {ps['nummer']:3d}: {ps['naam'][:40]:<40} | {ps['straat']}") + if len(polling_stations) > 5: + print(f" ... and {len(polling_stations) - 5} more") + print() + + # Generate XML + print("Generating EML_NL XML...") + xml_content = generate_eml_xml(config, polling_stations) + + # Suggest output filename + gemeente_clean = config['gemeente_naam'].lower().replace(' ', '_') + default_filename = f"stembureaus_{gemeente_clean}.eml.xml" + default_dir = Path(input_file).parent + + # Show save dialog + print("Opening save dialog...") + output_file = filedialog.asksaveasfilename( + title="Save EML.xml file as", + initialdir=str(default_dir), + initialfile=default_filename, + defaultextension=".eml.xml", + filetypes=[ + ("EML XML files", "*.eml.xml"), + ("XML files", "*.xml"), + ("All files", "*.*") + ] + ) + + if not output_file: + print("No output file selected. Exiting.") + sys.exit(0) + + # Write output file + print(f"Writing output to: {output_file}") + try: + with open(output_file, 'w', encoding='utf-8') as f: + f.write(xml_content) + except Exception as e: + messagebox.showerror("Error", f"Failed to write output file:\n{e}") + print(f"Error writing output file: {e}") + sys.exit(1) + + print() + print("=" * 60) + print("SUCCESS!") + print(f"Created: {output_file}") + print(f"Polling stations: {len(polling_stations)}") + print("=" * 60) + + # Show success message + messagebox.showinfo( + "Success", + f"EML.xml file created successfully!\n\n" + f"Output: {output_file}\n" + f"Polling stations: {len(polling_stations)}" + ) + + root.destroy() + + +if __name__ == "__main__": + main()