LoD1 OSM Import¶

This notebook provides the logic to import addresses and house shapes from German LoD1 cadastre data.

LoD1 model source: https://www.lvermgeo.sachsen-anhalt.de/de/kostenfreie_geobasisdaten_lvermgeo.html.

License/permission grant: https://wiki.openstreetmap.org/wiki/DE:Permissions/Geobasisdaten_Sachsen-Anhalt.

OSM Import guidelines: https://wiki.openstreetmap.org/wiki/Import/Guidelines

General information on OSM imports: https://wiki.openstreetmap.org/wiki/Import

In [1]:
# LoD1 CityGML model:
#url = "https://www.geodatenportal.sachsen-anhalt.de/gfds_webshare/download/LVermGeo/Geodatenportal/Online-Bereitstellung-LVermGeo/3D/LoD1.zip"
In [2]:
import configparser, numpy as np, os, pandas, requests, xml.sax, zipfile
from io import StringIO
import geopandas, pyproj, shapely.geometry, shapely.ops, shapely.validation
import osmnx as ox
/home/hw/geo/__pycache__/geo.env/lib/python3.10/site-packages/geopandas/_compat.py:112: UserWarning: The Shapely GEOS version (3.10.3-CAPI-1.16.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.
  warnings.warn(
In [3]:
## Source: Dokumentation zur Modellierung der Geoinformationen des amtlichen Vermessungswesens (GeoInfoDok)
## https://www.adv-online.de/icc/extdeu/nav/a63/binarywriterservlet?imgUid=9201016e-7efa-8461-e336-b6951fa2e0c9&uBasVariant=11111111-1111-1111-1111-111111111111
ATKIS_function = {   
    "31001_1000": { "building": "yes", "building:use": "residential" },  # Wohngebäude 
    "31001_1010": { "building": "yes", "building:use": "residential" },  # Wohnhaus 
    "31001_1011": { "building": "house", "building:use": "residential" },  # Einzelhaus 
    "31001_1012": { "building": "semidetached_house", "building:use": "residential" },  # Doppelhaus 
    "31001_1013": { "building": "terrace", "building:use": "residential" },  # Reihenhaus 
    "31001_1014": { "building": "yes", "building:use": "residential" },  # Gruppenhaus 
    "31001_1015": { "building": "apartments", "building:use": "residential" },  # Wohnblock 
    "31001_1016": { "building": "apartments", "building:use": "residential" },  # Hochhaus 
    "31001_1020": { "building": "dormitory", "building:use": "residential" },  # Wohnheim 
    "31001_1022": { "building": "yes", "building:use": "residential", "amenity": "nursing_home" },  # Seniorenheim 
    "31001_1023": { "building": "dormitory", "building:use": "residential", "dormitory": "nurse" },  # Schwesternwohnheim 
    "31001_1024": { "building": "dormitory", "building:use": "residential", "dormitory": "student" },  # Studenten-, Schülerwohnheim 
    "31001_1100": { "building": "yes", "building:use": "residential" },  # Gemischt genutztes Gebäude mit Wohnen
    "31001_1120": { "building": "yes", "building:use": "residential;retail;commercial" },  # Wohngebäude mit Handel und Dienstleistungen
    "31001_1121": { "building": "yes", "building:use": "residential;office" },  # Wohn- und Verwaltungsgebäude 
    "31001_1122": { "building": "yes", "building:use": "residential;office" },  # Wohn- und Bürogebäude 
    "31001_1123": { "building": "yes", "building:use": "residential;retail" },  # Wohn- und Geschäftsgebäude 
    "31001_1130": { "building": "yes", "building:use": "residential;industrial" },  # Wohngebäude mit Gewerbe und Industrie 
    "31001_1131": { "building": "yes", "building:use": "residential;commercial" },  # Wohn- und Betriebsgebäude
    "31001_1220": { "building": "yes", "building:use": "residential;agricultural" },  # Land- und forstwirtschaftliches Wohn- und Betriebsgebäude
    "31001_1221": { "building": "farm", "building:use": "residential;agricultural" },  # Bauernhaus
    "31001_1222": { "building": "yes", "building:use": "residential;agricultural" },  # Wohn- und Wirtschaftsgebäude
    "31001_1223": { "building": "yes", "office": "forestry", "building:use": "residential;forestry" },  # Forsthaus
    "31001_2000": { "building": "yes", "building:use": "industrial;commercial" },  # Gebäude für Wirtschaft oder Gewerbe 
    "31001_2010": { "building": "yes", "building:use": "commercial" },  # Gebäude für Handel und Dienstleistungen 
    "31001_2020": { "building": "yes", "building:use": "office" },  # Bürogebäude 
    "31001_2030": { "building": "yes", "building:use": "commercial", "amenity": "bank" },  # Kreditinstitut 
    "31001_2040": { "building": "yes", "building:use": "office", "office": "insurance" },  # Versicherung 
    "31001_2050": { "building": "yes", "building:use": "commercial" },  # Geschäftsgebäude 
    "31001_2052": { "building": "yes", "building:use": "retail", "shop": "mall" },  # Einkaufszentrum 
    "31001_2053": { "building": "yes", "building:use": "retail", "amenity": "marketplace" },  # Markthalle 
    "31001_2054": { "building": "yes", "building:use": "retail", "shop": "fixme" },  # Laden 
    "31001_2055": { "building": "yes", "building:use": "retail", "shop": "kiosk" },  # Kiosk 
    "31001_2060": { "building": "yes", "building:use": "exhibition", "amenity": "exhibition_hall" },  # Messehalle 
    "31001_2070": { "building": "yes", "building:use": "hotel" },  # Gebäude für Beherbergung 
    "31001_2071": { "building": "yes", "building:use": "hotel", "tourism": "hotel" },  # Hotel, Motel, Pension 
    "31001_2072": { "building": "yes", "building:use": "hotel", "tourism": "hostel" },  # Jugendherberge 
    "31001_2073": { "building": "yes", "tourism": "wilderness_hut", "amenity": "shelter" },  # Hütte (mit Übernachtungsmöglichkeit)
    "31001_2074": { "building": "yes" },  # Campingplatzgebäude 
    "31001_2080": { "building": "yes", "building:use": "foodservice" },  # Gebäude für Bewirtung 
    "31001_2081": { "building": "yes", "building:use": "foodservice", "amenity": "restaurant" },  # Gaststätte, Restaurant 
    "31001_2082": { "building": "hut", "building:use": "shelter" },  # Hütte (ohne Übernachtungsmöglichkeit) 
    "31001_2083": { "building": "yes", "building:use": "foodservice", "amenity": "canteen" },  # Kantine 
    "31001_2090": { "building": "yes", "building:use": "leisure", "amenity": "leisure" },  # Freizeit- und Vergnügungsstätte 
    "31001_2100": { "building": "yes", "building:use": "industrial" },  # Gebäude für Gewerbe und Industrie 
    "31001_2110": { "building": "yes", "building:use": "industrial" },  # Produktionsgebäude 
    "31001_2111": { "building": "yes", "building:use": "industrial" },  # Fabrik 
    "31001_2112": { "building": "yes", "building:use": "industrial" },  # Betriebsgebäude 
    "31001_2120": { "building": "yes", "building:use": "industrial" },  # Werkstatt 
    "31001_2130": { "building": "yes", "building:use": "service", "amenity": "fuel" },  # Tankstelle 
    "31001_2131": { "building": "yes", "building:use": "service", "amenity": "car_wash" },  # Waschstraße, Waschanlage, Waschhalle 
    "31001_2143": { "building": "warehouse", "building:use": "storage" },  # Lagerhalle, Lagerschuppen, Lagerhaus 
    "31001_2150": { "building": "yes", "building:use": "industrial" },  # Speditionsgebäude 
    "31001_2160": { "building": "yes", "building:use": "research", "office": "research" },  # Gebäude für Forschungszwecke 
    "31001_2170": { "building": "yes", "building:use": "industrial" },  # Gebäude für Grundstoffgewinnung 
    "31001_2180": { "building": "yes", "building:use": "gathering", "amenity": "social_facility" },  # Gebäude für betriebliche Sozialeinrichtung 
    "31001_2200": { "building": "yes", "building:use": "industrial" },  # Sonstiges Gebäude für Gewerbe und Industrie 
    "31001_2211": { "building": "yes", "man_made": "windmill" },  # Windmühle
    "31001_2212": { "building": "yes", "man_made": "watermill" },  # Wassermühle
    "31001_2213": { "building": "yes", "man_made": "watermill" },  # Schöpfwerk
    "31001_2220": { "building": "yes", "man_made": "monitoring_station", "monitoring:weather": "yes" },  # Wetterstation
    "31001_2320": { "building": "yes", "building:use": "industrial;residential" },  # Gebäude für Gewerbe und Industrie mit Wohnen 
    "31001_2410": { "building": "yes", "building:use": "civic" },  # Betriebsgebäude für Straßenverkehr 
    "31001_2411": { "building": "yes", "building:use": "civic" },  # Straßenmeisterei 
    "31001_2420": { "building": "yes", "building:use": "railway" },  # Betriebsgebäude für Schienenverkehr 
    "31001_2421": { "building": "yes", "building:use": "railway" },  # Bahnwärterhaus 
    "31001_2422": { "building": "yes", "building:use": "railway" },  # Lokschuppen, Wagenhalle 
    "31001_2440": { "building": "yes", "seamark:building:function": "fixme" },  # Betriebsgebäude für Schiffsverkehr 
    "31001_2460": { "building": "yes", "building:use": "parking", "amenity": "parking" },  # Gebäude zum Parken 
    "31001_2461": { "building": "yes", "building:use": "parking", "parking": "multi-storey", "amenity": "parking" },  # Parkhaus 
    "31001_2463": { "building": "garage", "amenity": "parking" },  # Garage
    "31001_2465": { "building": "garage", "parking": "underground", "amenity": "parking" },  # Tiefgarage   
    "31001_2501": { "building": "yes", "building:use": "industrial", "power": "fixme" },  # Gebäude zur Energieversorgung 
    "31001_2510": { "building": "yes", "building:use": "industrial", "man_made": "water_works" },  # Gebäude zur Wasserversorgung 
    "31001_2512": { "building": "yes", "building:use": "industrial", "man_made": "pumping_station" },  # Pumpstation 
    "31001_2513": { "building": "yes", "man_made": "reservoir_covered" },  # Wasserbehälter
    "31001_2514": { "building": "yes", "man_made": "water_tower" },  # Wasserturm
    "31001_2520": { "building": "yes", "building:use": "industrial", "power": "fixme" },  # Gebäude zur Elektrizitätsversorgung 
    "31001_2522": { "building": "yes", "building:use": "industrial", "power": "station" },  # Umspannwerk
    "31001_2523": { "building": "yes", "building:use": "industrial", "power": "sub_station" },  # Umformer
    "31001_2540": { "building": "yes", "building:use": "industrial" },  # Gebäude für Fernmeldewesen 
    "31001_2571": { "building": "yes", "building:use": "industrial", "man_made": "gas_plant" },  # Gaswerk 
    "31001_2580": { "building": "yes", "building:use": "industrial", "man_made": "heat_plant" },  # Heizwerk 
    "31001_2590": { "building": "yes", "building:use": "industrial" },  # Gebäude zur Versorgungsanlage 
    "31001_2610": { "building": "yes", "building:use": "industrial", "man_made": "wastewater_plant" },  # Gebäude zur Abwasserbeseitigung 
    "31001_2611": { "building": "yes", "building:use": "industrial", "man_made": "wastewater_plant" },  # Gebäude der Kläranlage 
    "31001_2620": { "building": "yes", "building:use": "industrial" },  # Gebäude zur Abfallbehandlung 
    "31001_2700": { "building": "farm_auxiliary", "building:use": "agricultural" },  # Gebäude für Land- und Forstwirtschaft 
    "31001_2720": { "building": "farm_auxiliary", "building:use": "agricultural" },  # Land- und forstwirtschaftliches Betriebsgebäude 
    "31001_2728": { "building": "yes", "building:use": "equestrian" },  # Reithalle 
    "31001_2740": { "building": "greenhouse", "building:use": "agricultural" },  # Gewächshaus 
    "31001_3000": { "building": "yes", "building:use": "public", "amenity": "public_building" },  # Gebäude für öffentliche Zwecke 
    "31001_3001": { "building": "yes", "building:use": "gathering", "club": "fixme" },  # Verein, Verband 
    "31001_3010": { "building": "yes", "building:use": "government" },  # Verwaltungsgebäude     
    "31001_3011": { "building": "yes", "building:use": "government", "office": "government" },  # Parlament
    "31001_3012": { "building": "yes", "building:use": "government", "amenity": "townhall" },  # Rathaus
    "31001_3013": { "building": "yes", "building:use": "service", "amenity": "post_office" },  # Post 
    "31001_3014": { "building": "yes", "building:use": "civic", "amenity": "customs" },  # Zollamt 
    "31001_3015": { "building": "yes", "building:use": "civic", "amenity": "courthouse" },  # Gericht 
    "31001_3017": { "building": "yes", "building:use": "government" },  # Stadtverwaltung 
    "31001_3019": { "building": "yes", "building:use": "civic", "office": "tax" },  # Finanzamt 
    "31001_3020": { "building": "yes", "building:use": "education" },  # Gebäude für Bildung und Forschung 
    "31001_3021": { "building": "yes", "building:use": "education", "amenity": "school" },  # Allgemein bildende Schule 
    "31001_3022": { "building": "yes", "building:use": "education", "amenity": "school" },  # Berufsbildende Schule 
    "31001_3023": { "building": "yes", "building:use": "education", "amenity": "university" },  # Hochschulgebäude (Fachhochschule, Universität) 
    "31001_3024": { "building": "yes", "building:use": "research", "office": "research" },  # Forschungsinstitut 
    "31001_3030": { "building": "yes", "building:use": "cultural", "amenity": "events_venue" },  # Gebäude für kulturelle Zwecke 
    "31001_3031": { "building": "castle", "historic": "castle" },  # Schloss 
    "31001_3032": { "building": "yes", "building:use": "cultural", "amenity": "theatre" },  # Theater, Oper 
    "31001_3033": { "building": "yes", "building:use": "cultural", "amenity": "concert_hall" },  # Konzertgebäude 
    "31001_3034": { "building": "yes", "building:use": "cultural", "tourism": "museum" },  # Museum 
    "31001_3035": { "building": "yes", "amenity": "studio" },  # Rundfunk, Fernsehen (Studio)
    "31001_3036": { "building": "yes", "building:use": "cultural" },  # Veranstaltungsgebäude 
    "31001_3037": { "building": "yes", "building:use": "education", "amenity": "library" },  # Bibliothek, Bücherei 
    "31001_3038": { "building": "yes", "building:use": "fixme", "historic": "castle" },  # Burg, Festung 
    "31001_3040": { "building": "yes", "building:use": "religious", "amenity": "place_of_worship" },  # Gebäude für religiöse Zwecke 
    "31001_3041": { "building": "church", "building:use": "religious", "amenity": "place_of_worship" },  # Kirche 
    "31001_3042": { "building": "synagogue", "building:use": "religious", "amenity": "place_of_worship" },  # Synagoge 
    "31001_3043": { "building": "chapel", "building:use": "religious", "amenity": "place_of_worship" },  # Kapelle 
    "31001_3044": { "building": "yes", "building:use": "religious", "amenity": "parish_hall" },  # Gemeindehaus 
    "31001_3045": { "building": "yes", "building:use": "religious", "amenity": "place_of_worship" },  # Gotteshaus
    "31001_3050": { "building": "yes", "building:use": "medical" },  # Gebäude für Gesundheitswesen 
    "31001_3051": { "building": "yes", "building:use": "medical", "amenity": "hospital" },  # Krankenhaus 
    "31001_3052": { "building": "yes", "building:use": "medical", "amenity": "sanatorium" },  # Heilanstalt, Pflegeanstalt, Pflegestation 
    "31001_3053": { "building": "yes", "building:use": "medical", "amenity": "doctors" },  # Ärztehaus, Poliklinik 
    "31001_3060": { "building": "yes", "building:use": "gathering", "amenity": "social_facility" },  # Gebäude für soziale Zwecke 
    "31001_3061": { "building": "yes", "building:use": "gathering", "amenity": "youth_centre" },  # Jugendfreizeitheim 
    "31001_3062": { "building": "yes", "building:use": "gathering", "amenity": "community_centre" },  # Freizeit-, Vereinsheim, Dorfgemeinschafts-, Bürgerhaus 
    "31001_3064": { "building": "yes", "building:use": "residential" },  # Obdachlosenheim 
    "31001_3065": { "building": "yes", "building:use": "education", "amenity": "kindergarten" },  # Kinderkrippe, Kindergarten, Kindertagesstätte 
    "31001_3070": { "building": "yes", "building:use": "civic" },  # Gebäude für Sicherheit und Ordnung 
    "31001_3071": { "building": "yes", "building:use": "civic", "amenity": "police" },  # Polizei 
    "31001_3072": { "building": "yes", "building:use": "civic", "amenity": "fire_station" },  # Feuerwehr
    "31001_3073": { "building": "yes", "building:use": "military", "military": "barracks" },  # Kaserne
    "31001_3075": { "building": "yes", "amenity": "prison" },  # Justizvollzugsanstalt
    "31001_3080": { "building": "yes" },  # Friedhofsgebäude
    "31001_3082": { "building": "yes", "amenity": "crematorium" },  # Krematorium
    "31001_3090": { "building": "yes" },  # Empfangsgebäude
    "31001_3091": { "building": "yes", "building:use": "railway", "railway": "station" },  # Bahnhofsgebäude
    "31001_3094": { "building": "yes", "building:use": "railway", "railway": "subway_entrance" },  # Gebäude zum U-Bahnhof
    "31001_3100": { "building": "yes", "building:use": "public;residential" },  # Gebäude für öffentliche Zwecke mit Wohnen
    "31001_3200": { "building": "yes", "building:use": "leisure", "amenity": "leisure" },  # Gebäude für Erholungszwecke
    "31001_3210": { "building": "sports_hall", "building:use": "sport", "leisure": "sports_hall" },  # Gebäude für Sportzwecke
    "31001_3211": { "building": "sports_hall", "building:use": "sport", "leisure": "sports_hall" },  # Sport-, Turnhalle
    "31001_3212": { "building": "yes", "building:use": "sport", "leisure": "sports_centre" },  # Gebäude zum Sportplatz
    "31001_3220": { "building": "yes", "building:use": "sport", "sport": "swimming" },  # Badegebäude
    "31001_3221": { "building": "sports_hall", "building:use": "sport", "sport": "swimming", "leisure": "sports_hall" },  # Hallenbad
    "31001_3222": { "building": "yes", "building:use": "sport", "sport": "swimming", "leisure": "sports_centre" },  # Gebäude im Freibad
    "31001_3223": { "building": "yes", "building:use": "sport", "sport": "swimming", "leisure": "sports_centre" },  # Gebäude im Kombibad
    "31001_3230": { "building": "yes", "building:use": "sport", "leisure": "sports_centre" },  # Gebäude im Stadion
    "31001_3241": { "building": "yes", "building:use": "medical", "sport": "swimming" },  # Badegebäude für medizinische Zwecke
    "31001_3242": { "building": "yes", "building:use": "medical", "amenity": "sanatorium" },  # Sanatorium
    "31001_3260": { "building": "yes", "tourism": "zoo" },  # Gebäude im Zoo 
    "31001_3270": { "building": "yes", "leisure": "garden" },  # Gebäude im botanischen Garten 
    "31001_3281": { "building": "yes", "building:use": "shelter", "amenity": "shelter" },  # Schutzhütte
    "31001_9998": { "building": "yes" },  # Nach Quellenlage nicht zu spezifizieren
}

class LoD1StreamHandler(xml.sax.handler.ContentHandler):
    def __init__(self):
        super().__init__()
        self.element_stack = []
        self.envelope = {}
        self.elements = []
        self.polygons = []
        self.content = ""

    def transformCoords(self, x, ndims = 3):
        """ Transform coordinates `x` according to envelope transform specification """
        p = np.array(x.split()).astype(float).reshape(-1, ndims)
        return np.array(self.transformer.transform(p[:,0], p[:,1], p[:,2])).T
            
    def startElement(self, name, attrs):
        self.element_stack.append({"name": name, "attrs": attrs})
        if name == "bldg:Building":
            self.elements.append({
                # Useful for identification or debugging? TODO: possibly remove:
                "source:geometry": self.envelope["source"]
            })
        if name == "gml:Envelope":
            self.envelope["srs"] = attrs["srsName"]
            if attrs["srsName"] == "urn:adv:crs:ETRS89_UTM32*DE_DHHN2016_NH":
                ## As of 2017/18 all ATKIS/INSPIRE is based on ETRS89_UTM32 + DHHN2016 height
                self.transformer = pyproj.Transformer.from_crs("EPSG:25832", "EPSG:4326")
            else:
                raise Exception("Unexpected SRS coordinate frame. Please specify transform.")
        if name == "gml:posList":
            assert attrs["srsDimension"] == "3", "FIXME: only 3D coordinates supported for now"
        if name == "gml:Polygon":
            self.element_stack[-1]["interior"] = []

    def endElement(self, name):
        #print(name)
        self.content = self.content.strip()
        this = self.element_stack[-1]
        parent = self.element_stack[-2] if len(self.element_stack) > 1 else None
        assert name == this["name"], "inconsistent XML parser state"
        tag_mapping = {
            "xal:ThoroughfareName": ("addr:street", str),
            "xal:ThoroughfareNumber": ("addr:housenumber", str),
            "xal:LocalityName": ("addr:city", str),
            "xal:CountryName": ("addr:country", str),
            "bldg:measuredHeight": ("height", float)
        }
        if name in tag_mapping.keys():
            osm_key, osm_type = tag_mapping[name]
            value = self.content
            if name == "xal:LocalityName" and value.endswith(", Stadt"):
                value = value[:-7]
            if name == "xal:LocalityName" and value.endswith(", Landeshauptstadt"):
                value = value[:-18]
            if name == "xal:CountryName" and value == "Deutschland":
                value = "DE"
            self.elements[-1][osm_key] = osm_type(value)
        if name == "bldg:function":
            global ATKIS_function
            k = self.content
            assert k in ATKIS_function, f"Unknown building function {k}, please add ATKIS function type"
            #self.elements[-1]["ATKIS"] = k
            self.elements[-1].update(ATKIS_function[k])
            if "building" not in self.elements[-1]:
                self.elements[-1]["building"] = "yes"
            self.polygons = []
        if name == "gml:posList":
            p = self.transformCoords(self.content)
            parent["posList"] = p[:,0:2]
            self.elements[-1]["ele:wgs84"] = p[:,2].min()
        if name == "gml:LinearRing":
            l = shapely.geometry.LineString(this["posList"])
            parent["LinearRing"] = shapely.validation.make_valid(l)
        if name == "gml:exterior":
            if "LinearRing" in this:
                parent["exterior"] = this["LinearRing"]
        if name == "gml:interior":
            if "LinearRing" in this and this["LinearRing"].length > 0:
                parent["interior"].append(this["LinearRing"])
        if name == "gml:Polygon":
            p = None
            try:
                if this["exterior"].length > 0:
                    p = shapely.geometry.Polygon(this["exterior"], this["interior"])
                    p = shapely.validation.make_valid(p)
                    assert p.is_valid, "Failed to validate polygon"
                    p = p.simplify(1e-9)
                    if p.area > 0:
                        self.polygons.append(p)
            except Exception as e:
                msg = "*** Failed to create polygon: " + str(e)
                msg += "\n*** " + str(self.elements[-1])
                msg += "\n*** shell\n" + str(this["exterior"])
                msg += "\n*** holes\n" + str(this["interior"])
                if p is not None:
                    msg += "\n*** Exterior\n" + str(list(p.exterior.coords))
                    msg += "\n*** Interior\n" + str(list(p.interiors))
                    msg += "\n*** Explanation: " + shapely.validation.explain_validity(p)
                raise Exception(msg)
        if name == "core:creationDate":
            self.elements[-1]["source:geometry:gml:date"] = self.content
        if name == "core:name":
            self.elements[-1]["source:geometry:gml:id"] = self.content
        if name == "bldg:Building":
            if len(self.polygons) > 1:
                for i in range(len(self.polygons)):
                    # eps = 1e-8 is approx 25cm
                    eps = 1e-8
                    # slightly widen geometry so that small gaps don't cause slivers when doing union:
                    self.polygons[i] = self.polygons[i].buffer(eps)
                u = shapely.ops.unary_union(self.polygons).buffer(-eps) # shrink to undo prior widening.
            else:
                u = self.polygons[0]
            u = shapely.validation.make_valid(u)
            u = u.simplify(1e-9)
            self.elements[-1]["geometry"] = u
            self.elements[-1]["source:geometry"] += ":" + self.elements[-1]["source:geometry:gml:id"]
            self.elements[-1]["source:geometry"] += " (" + self.elements[-1]["source:geometry:gml:date"] + ")"
            del self.elements[-1]["source:geometry:gml:date"]
            del self.elements[-1]["source:geometry:gml:id"]
            self.polygons = []
        if name == "gml:name":
            if parent["name"] == "core:CityModel":
                self.name = self.content
            elif parent["name"] == "bldg:Building":
                self.elements[-1]["name"] = self.content
            else:
                raise Exception(f"*** EXCEPTION : Found tag gml:name in unexpected scope {parent}")
        if name == "gml:boundedBy":
            self.envelope["dataset"] = self.name
            self.envelope["source"] = "GeoBasis-DE / LVermGeo LSA, dl-de/by-2-0, " + self.envelope["dataset"]
        if name == "gml:lowerCorner":
            self.envelope["lowerCorner"] = self.transformCoords(self.content)
        if name == "gml:upperCorner":
            self.envelope["upperCorner"] = self.transformCoords(self.content)
        self.content = ""
        self.element_stack.pop()

    def characters(self, content):
        self.content += content

class Quadrant:
    def __init__ (self, envelope = {}, elements = [], with_index = False):
        self.envelope = envelope
        self.elements = elements
        if with_index:
            self.build_index()

    def build_index (self):
        #print(f"*** Build index for {len(self.elements)} elements...")
        geoms = []
        for i,obj in enumerate(self.elements):
            geoms.append(coordinates=obj["geometry"])
            #print(i, obj)
            self.index.insert(id=i, coordinates=obj["geometry"].bounds, obj=obj)
        self.index = shapely.strtree.STRtree()

def parse_LoD1 (f):
    handler = LoD1StreamHandler()
    parser = xml.sax.parse(f, handler=handler)
    quadrant = Quadrant(handler.envelope, handler.elements)
    del handler
    del parser
    return quadrant

#%time quadrant = parse_LoD1("LoD1/LoD1_326785740.gml")
#%time quadrant = parse_LoD1("LoD1/LoD1_326865738.gml")
#quadrant.elements
In [4]:
node_id = 0
node_dict = {}

def elements2xml(elements, attrs=""):
    def element2xml(element, attrs=""):
        def geometry2xml (g, tags_xml="", attrs=""):
            global node_id
            global node_dict
            xml = ''
            try:
                points = np.array(g.exterior.coords.xy).T
            except:
                points = np.array(g.coords.xy).T
            for p in points:
                if tuple(p) not in node_dict:
                    node_id -= 1
                    node_dict[tuple(p)] = node_id
                    xml += f'\t<node id="{node_id}" {attrs} lat="{p[0]}" lon="{p[1]}"/>\n'                
            node_id -= 1
            xml += f'\t<way id="{node_id}" {attrs} visible="true">\n'
            for p in points:
                xml +=f'\t\t<nd ref="{node_dict[tuple(p)]}"/>\n'
            xml += tags_xml
            xml += '\t</way>\n'
            return node_id, xml

        def geometrycollection2xml (g, tags_xml = "", attrs=""):
            global node_id
            global node_dict
            xml = ''
            if hasattr(g, "geoms"):
                parts = []
                for p in g.geoms:
                    part_id, part_xml = geometrycollection2xml(p, attrs=attrs)
                    parts.append(part_id)
                    xml += part_xml
                node_id -= 1
                xml += f'\t<relation id="{node_id}" {attrs} visible="true">\n'
                part_id = node_id
                for p in parts:
                    xml += f'\t\t<member type="way" {attrs} ref="{p}" role="outer"/>\n'
                xml += '\t\t<tag k="type" v="multipolygon"/>\n'
                xml += tags_xml
                xml += '\t</relation>\n'
            else:
                part_id, part_xml = geometry2xml(g, tags_xml=tags_xml, attrs=attrs)
                xml += part_xml
            return part_id, xml

        global node_id
        global node_dict
        tags_xml = ''
        for k,v in element.items():
            ignored_tags = ["geometry", "ways", "nodes", "type"]
            if k not in ignored_tags and str(v) != "nan":
                tags_xml += '\t\t<tag k="' + k + '" v="' + str(v) + '"/>\n'
        xml = ''
        if "geometry" in element:
            _, geometry_xml = geometrycollection2xml(element['geometry'], tags_xml=tags_xml, attrs=attrs)
            xml += geometry_xml
        return xml

    if isinstance(elements, geopandas.geodataframe.GeoDataFrame):
        ## transform geopandas dataframes into WGS84, then flip lon-lat into lat-lon to match OSM notation:
        elements = elements.to_crs("EPSG:4326")
        elements['geometry'] = elements['geometry'].map(lambda polygon: shapely.ops.transform(lambda x, y: (y, x), polygon))
        elements = elements.to_dict("records")
    global node_id
    global node_dict
    node_id = 0
    node_dict = {}
    xml = ''
    for e in elements:
        #xml += '\t<!-- gml:id ' + e["gml:id"] + ' -->\n'
        xml += element2xml(e, attrs=attrs)
    return xml

def to_osm (elements):
    osmxml = '<?xml version="1.0" encoding="UTF-8"?>\n'
    osmxml += '<osm version="0.6" generator="LoD1 OSM import">\n'
    #<note>... </note>
    #<meta osm_base="2012-09-20T20\:24\:02Z"/>
    osmxml += elements2xml(elements)
    osmxml += '</osm>\n'
    return osmxml

def to_changeset (create = [], modify = [], delete = [], generator="LoD1 OSM notebook"):
    xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
    xml += f'<osmChange version="0.6" generator="{generator}">\n'
    if len(delete) > 0:
        xml += '<delete if-unused="1">\n'
        xml += elements2xml(delete, attrs='changeset=""')
        xml += '</delete>\n'
    if len(modify) > 0:
        xml += '<modify>\n'
        xml += elements2xml(modify, attrs='changeset=""')
        xml += '</modify>\n'
    if len(create) > 0:
        xml += '<create>\n'
        xml += elements2xml(create, attrs='changeset=""')
        xml += '</create>\n'
    xml += '</osmChange>\n'
    return xml
In [5]:
def list_gmls_in_zip (zip_fname):
    with zipfile.ZipFile(zip_fname) as z:
        return [l for l in z.namelist() if l.endswith(".zip")]

def gmls_in_zip (zip_fname, select_gmls = None):
    with zipfile.ZipFile(zip_fname) as z:
        if select_gmls is None:
            select_gmls = list_gmls_in_zip(zip_fname)
        for i,gml_zipfile in enumerate(select_gmls):
            with zipfile.ZipFile(z.open(gml_zipfile)) as f:
                fnames = f.namelist()
                assert len(fnames) == 1, ""
                print(f"\r*** [{i+1}/{len(select_gmls)}] Parse {zip_fname}:{gml_zipfile}:{fnames[0]}   \r", end="")
                yield f.open(fnames[0])
In [6]:
def nonoverlapping_buildings (df2, df1):
    ## return dataframe with buildings from df2 that do not overlap/intersect with buildings from df1
    ## cf. https://gis.stackexchange.com/questions/418283/find-and-remove-overlapping-polygons-with-geopandas
    df2["savedindex"] = df2.index
    intersecting = df1.sjoin(df2, how="inner")["savedindex"]
    df2 = df2[~df2.savedindex.isin(intersecting)].drop(["savedindex"], axis=1)
    return df2

def process_nonoverlapping_buildings_in_quadrant(quadrant):
    if not os.path.isdir("changesets"):
        os.makedirs("changesets")
    dataset_name = quadrant.envelope["dataset"]
    osm_name = "changesets/" + dataset_name + ".osm"
    osc_name = "changesets/" + dataset_name + ".osc"
    with open(osm_name, "w") as f:
        f.write(to_osm(quadrant.elements))
    changeset = ox.geometries_from_xml(osm_name)
    with open(osm_name, "w") as f:
        f.write(to_osm(changeset))
    bbox = changeset.total_bounds
    osmset = ox.geometries.geometries_from_bbox(bbox[3], bbox[1], bbox[2], bbox[0],
                                                tags={"building": True, "man_made": True, "amenity": True})
    osc = nonoverlapping_buildings(changeset, osmset)
    with open(osc_name, "w") as f:
        f.write(to_changeset(osc))
    return osc, dataset_name

gml_list = list_gmls_in_zip("LoD1.zip")
print(f"*** Found {len(gml_list)} GMLs in LoD1 dataset.")

stats_csv_fname = "OSM LoD1 notebook import stats.csv"
try:
    stats = pandas.read_csv(stats_csv_fname, index_col=0)
except:
    stats = pandas.DataFrame(index=["Total"])

gml_list = [
    "LoD1/LoD1_326705780.zip", "LoD1/LoD1_326725780.zip", "LoD1/LoD1_326745780.zip",
    "LoD1/LoD1_326765780.zip", "LoD1/LoD1_326785780.zip",

    "LoD1/LoD1_326705778.zip", "LoD1/LoD1_326725778.zip", "LoD1/LoD1_326745778.zip",
    "LoD1/LoD1_326765778.zip", "LoD1/LoD1_326785778.zip",

    "LoD1/LoD1_326705776.zip",                            "LoD1/LoD1_326745776.zip",
    "LoD1/LoD1_326765776.zip", "LoD1/LoD1_326785776.zip",

    "LoD1/LoD1_326705774.zip", "LoD1/LoD1_326725774.zip", "LoD1/LoD1_326745774.zip",
    "LoD1/LoD1_326765774.zip", "LoD1/LoD1_326785774.zip",

    "LoD1/LoD1_326705772.zip",                            "LoD1/LoD1_326745772.zip",
                               "LoD1/LoD1_326785772.zip",
]    

for gml in gmls_in_zip("LoD1.zip", gml_list):
    try:
        quadrant = parse_LoD1(gml)
        stats.loc[quadrant.envelope["dataset"], "buildings"] = len(quadrant.elements)
        osc, dataset_name = process_nonoverlapping_buildings_in_quadrant(quadrant)
        stats.loc[quadrant.envelope["dataset"], "not_in_osm"] = len(osc)
        if "addr:housenumber" in osc.columns:
            osc_a = osc.dropna(subset=["addr:housenumber"])
            stats.loc[quadrant.envelope["dataset"], "not_in_osm_with_addr"] = len(osc_a)
            with open("changesets/" + dataset_name + ".nonoverlapping_with_housenumber.osm", "w") as f:
                f.write(to_osm(osc_a))
            del osc_a
        else:
            stats.loc[quadrant.envelope["dataset"], "not_in_osm_with_addr"] = 0
        del osc
        del quadrant
    except Exception as e:
        print(f"*** EXCEPTION :\n{e}")
    finally:
        stats.loc["Total"] = stats.drop("Total").sum()
        stats.to_csv(stats_csv_fname)

stats
*** Found 4485 GMLs in LoD1 dataset.
*** [22/22] Parse LoD1.zip:LoD1/LoD1_326785772.zip:LoD1_326785772.gml   
Out[6]:
buildings not_in_osm not_in_osm_with_addr
Total 49322.0 17568.0 390.0
LoD1_670_5780_2_ST 607.0 165.0 0.0
LoD1_672_5780_2_ST 31.0 6.0 0.0
LoD1_674_5780_2_ST 2067.0 518.0 6.0
LoD1_676_5780_2_ST 3785.0 1561.0 93.0
LoD1_678_5780_2_ST 5227.0 2943.0 60.0
LoD1_670_5778_2_ST 2959.0 273.0 1.0
LoD1_672_5778_2_ST 155.0 1.0 0.0
LoD1_674_5778_2_ST 1285.0 387.0 11.0
LoD1_676_5778_2_ST 5344.0 2158.0 10.0
LoD1_678_5778_2_ST 3869.0 983.0 6.0
LoD1_670_5776_2_ST 113.0 1.0 0.0
LoD1_674_5776_2_ST 558.0 248.0 11.0
LoD1_676_5776_2_ST 3723.0 1930.0 67.0
LoD1_678_5776_2_ST 3344.0 946.0 11.0
LoD1_670_5774_2_ST 1352.0 30.0 2.0
LoD1_672_5774_2_ST 266.0 9.0 0.0
LoD1_674_5774_2_ST 1958.0 641.0 10.0
LoD1_676_5774_2_ST 5131.0 1876.0 53.0
LoD1_678_5774_2_ST 4498.0 1767.0 38.0
LoD1_670_5772_2_ST 278.0 6.0 0.0
LoD1_674_5772_2_ST 804.0 637.0 5.0
LoD1_678_5772_2_ST 1968.0 482.0 6.0
In [7]:
import folium, folium.plugins

#changeset_fname = "changesets/" + np.random.choice(stats.index[1:]) + ".nonoverlapping_with_housenumber.osm"
changeset_fname = "changesets/LoD1_670_5772_2_ST.osm"
print(changeset_fname)

changeset = ox.geometries_from_xml(changeset_fname)
bbox = changeset.total_bounds
osmset = ox.geometries.geometries_from_bbox(bbox[1], bbox[3], bbox[0], bbox[2],
                                            tags={"building": True, "man_made": True, "amenity": True})
if "nodes" in osmset: del osmset["nodes"]
if "ways" in osmset: del osmset["ways"]
if "nodes" in changeset: del changeset["nodes"]
if "ways" in changeset: del changeset["ways"]
if "source:geometry" in changeset: del changeset["source:geometry"]

map_geo = folium.plugins.DualMap(location=[(bbox[1] + bbox[3]) / 2, (bbox[0] + bbox[2]) / 2],
                                 max_zoom=50, tiles=None) #"OpenStreetMap")

layer = "lsa_lvermgeo_dop20_2"
folium.raster_layers.WmsTileLayer(url = 'https://www.geodatenportal.sachsen-anhalt.de/wss/service/ST_LVermGeo_DOP_WMS_OpenData/guest?',
                                  fmt="image/jpeg",
                                  max_zoom=50, 
                                  layers = layer,
                                  name = layer,
                                  transparent = False, 
                                  control = True,
                                  overlay = False,
                                  show = True,
                                  opacity=1.0
                                  ).add_to(map_geo)

osmset.explore(m=map_geo, style_kwds={"fillOpacity": 0.1, "color": "red", "weight": 1}, name="osm")
changeset.explore(m=map_geo, style_kwds={"fillOpacity": 0.1, "color": "blue", "weight": 1}, name="changeset")

folium.LayerControl().add_to(map_geo)
folium.plugins.MeasureControl().add_to(map_geo)
folium.plugins.MiniMap(tile_layer='cartodb positron', position='bottomleft').add_to(map_geo)
folium.plugins.MousePosition().add_to(map_geo)

map_geo
changesets/LoD1_670_5772_2_ST.osm
Out[7]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [8]:
# e = "No data"
# if len(osc) > 0:
#     e = osc.explore(max_zoom=50)
# e
# folium.raster_layers.WmsTileLayer(url = 'https://www.geodatenportal.sachsen-anhalt.de/wss/service/ST_LVermGeo_DOP_WMS_OpenData/guest?',
#                                   fmt="image/jpeg",
#                                   max_zoom=50, 
#                                   layers = layer,
#                                   name = layer,
#                                   transparent = False, 
#                                   control = True,
#                                   overlay = False,
#                                   show = True,
#                                   opacity=1.0
#                                   ).add_to(e)
# folium.LayerControl().add_to(e)
# folium.plugins.MeasureControl().add_to(e)
# folium.plugins.MousePosition().add_to(e)
# print(f"*** {len(osc)} buildings in changeset and not in OSM:")
# e
In [9]:
# try:
#     import IPython.display
#     buildings_with_streetname = osc.dropna(subset=["addr:street"])
#     print(f"*** {len(buildings_with_streetname)} buildings with street names:")
#     IPython.display.display_html(buildings_with_streetname.explore())
# except Exception as e:
#     print("*** No buildings with street names in changeset and not in OSM.")
In [10]:
class OSM:

    class API:
        def __init__(self, production = False):
            if production:
                self.apiurl = "https://api.openstreetmap.org/api/"
            else:
                self.apiurl = "https://master.apis.dev.openstreetmap.org/api/"
            config = configparser.ConfigParser()
            config.read('.config')
            self.auth = requests.auth.HTTPBasicAuth(config["OSM"]["user"], config["OSM"]["password"])

        def request (self, method, f, data=None):
            #print(f"*** HTTP REQUEST : {self.apiurl + f}")
            headers = { "User-Agent": "OSM LoD1 notebook", "Content-Type": "application/xml; charset=utf-8;" }
            r = requests.request(method=method, url=self.apiurl + f, data=data, headers=headers, auth=self.auth)
            #print(f"*** HTTP RESPONSE HEADERS : {r.headers}")
            r.raise_for_status()
            if "Error" in r.headers:
                raise Exception(r.headers["Error"]) 
            return r.content.decode("utf-8")

        def get (self, f):
            return self.request(method="GET", f=f)

        def put (self, f, data = None):
            print(data)
            return self.request(method="PUT", f=f, data=data)

        def post (self, f, data = None):
            return self.request(method="POST", f=f, data=data)

    def __init__ (self, production =False):
        self.api = self.API(production=production)

    def get_versions (self):
        return self.api.get("versions")

    def get_capabilities (self):
        return self.api.get("capabilities")

    def get_map (self, bbox):
        return self.api.get(f"0.6/map?bbox={bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]}")

    def get_permissions (self):
        return self.api.get("0.6/permissions")

    def upload_changeset (self, changeset_xml, **kwargs):
        changeset_metadata_xml = '<osm>'
        changeset_metadata_xml += '\t<changeset>'
        for k, v in kwargs.items():
            changeset_metadata_xml += '\t\t<tag k="' + k + '" v="' + v + '"/>'
        changeset_metadata_xml += '\t</changeset>'
        changeset_metadata_xml += '</osm>'
        changeset_id = self.api.put("0.6/changeset/create", data=changeset_metadata_xml.encode("utf-8"))
        changeset_xml = changeset_xml.replace('changeset=""', 'changeset="' + changeset_id + '"')
        self.api.post(f"0.6/changeset/{changeset_id}/upload", data=changeset_xml.encode("utf-8"))
        return self.api.put(f"0.6/changeset/{changeset_id}/close")
In [11]:
osm = OSM(production=False)
#print(osm.get_versions())
# #print(osm.get_capabilities())
#print(osm.get_permissions())

description = "Buildings from GeoBasis-DE / LVermGeo LSA LoD1."

comment = "LoD1 geometry processed to derive footprint and elevation. Grant of permission: "\
    + "https://wiki.openstreetmap.org/wiki/DE:Permissions/Geobasisdaten_Sachsen-Anhalt. "\
    + "Code for transformation: https://codeberg.org/j0j/OSM "

assert len(comment) <= 255, "Tags cannot be larger than 255 characters."
assert len(description) <= 255, "Tags cannot be larger than 255 characters."

# osm.upload_changeset(open("changesets/LoD1_608_5758_2_ST.osc", "rb").read().decode("utf-8"),
#                      description=description,
#                      comment=comment,
#                      created_by="OSM LoD1 notebook",
#                      imagery_used="lsa_lvermgeo_dop20_2",
#                      source="GeoBasis-DE / LVermGeo LSA, dl-de/by-2-0, LoD1",
#                      #attribution="GeoBasis-DE / LVermGeo LSA, dl-de/by-2-0, LoD1",
#                      #bot="yes",
#                      review_requested="yes")
In [ ]:
 
In [ ]: