Update DM XML scraper script (#33452)
* scraper updates re-write this msg
* Restyled by isort
---------
Co-authored-by: Restyled.io <commits@restyled.io>
diff --git a/scripts/spec_xml/generate_spec_xml.py b/scripts/spec_xml/generate_spec_xml.py
index b947b99..ecdd47e 100755
--- a/scripts/spec_xml/generate_spec_xml.py
+++ b/scripts/spec_xml/generate_spec_xml.py
@@ -20,6 +20,8 @@
import re
import subprocess
import sys
+import xml.etree.ElementTree as ElementTree
+from pathlib import Path
import click
@@ -36,6 +38,20 @@
return os.path.abspath(os.path.join(output_dir, xml))
+def make_asciidoc(target: str, include_in_progress: bool, spec_dir: str, dry_run: bool) -> str:
+ cmd = ['make', 'PRINT_FILENAMES=1']
+ if include_in_progress:
+ cmd.append('INCLUDE_IN_PROGRESS=1')
+ cmd.append(target)
+ if dry_run:
+ print(cmd)
+ return ''
+ else:
+ ret = subprocess.check_output(cmd, cwd=spec_dir).decode('UTF-8').rstrip()
+ print(ret)
+ return ret
+
+
@click.command()
@click.option(
'--scraper',
@@ -56,16 +72,21 @@
default=False,
is_flag=True,
help='Flag for dry run')
-def main(scraper, spec_root, output_dir, dry_run):
+@click.option(
+ '--include-in-progress',
+ default=True,
+ type=bool,
+ help='Include in-progress items from spec')
+def main(scraper, spec_root, output_dir, dry_run, include_in_progress):
# Clusters need to be scraped first because the cluster directory is passed to the device type directory
- scrape_clusters(scraper, spec_root, output_dir, dry_run)
- scrape_device_types(scraper, spec_root, output_dir, dry_run)
+ scrape_clusters(scraper, spec_root, output_dir, dry_run, include_in_progress)
+ scrape_device_types(scraper, spec_root, output_dir, dry_run, include_in_progress)
if not dry_run:
dump_versions(scraper, spec_root, output_dir)
dump_cluster_ids(output_dir)
-def scrape_clusters(scraper, spec_root, output_dir, dry_run):
+def scrape_clusters(scraper, spec_root, output_dir, dry_run, include_in_progress):
src_dir = os.path.abspath(os.path.join(spec_root, 'src'))
sdm_clusters_dir = os.path.abspath(
os.path.join(src_dir, 'service_device_management'))
@@ -74,22 +95,25 @@
media_clusters_dir = os.path.abspath(
os.path.join(app_clusters_dir, 'media'))
clusters_output_dir = os.path.abspath(os.path.join(output_dir, 'clusters'))
- dm_clusters_list = ['ACL-Cluster.adoc', 'Binding-Cluster.adoc', 'bridge-clusters.adoc',
- 'Descriptor-Cluster.adoc', 'Group-Key-Management-Cluster.adoc', 'ICDManagement.adoc',
- 'Label-Cluster.adoc']
- sdm_exclude_list = ['AdminAssistedCommissioningFlows.adoc', 'BulkDataExchange.adoc', 'CommissioningFlows.adoc',
- 'DeviceCommissioningFlows.adoc', 'DistributedComplianceLedger.adoc', 'OTAFileFormat.adoc']
- app_exclude_list = ['appliances.adoc', 'closures.adoc', 'general.adoc',
- 'hvac.adoc', 'lighting.adoc', 'meas_and_sense.adoc', 'robots.adoc']
- media_exclude_list = ['media.adoc', 'VideoPlayerArchitecture.adoc']
if not os.path.exists(clusters_output_dir):
os.makedirs(clusters_output_dir)
+ print('Generating main spec to get file include list - this make take a few minutes')
+ main_out = make_asciidoc('pdf', include_in_progress, spec_root, dry_run)
+ print('Generating cluster spec to get file include list - this make take a few minutes')
+ cluster_out = make_asciidoc('pdf-appclusters-book', include_in_progress, spec_root, dry_run)
+
def scrape_cluster(filename: str) -> None:
+ base = Path(filename).stem
+ if base not in main_out and base not in cluster_out:
+ print(f'skipping file: {base} as it is not compiled into the asciidoc')
+ return
xml_path = get_xml_path(filename, clusters_output_dir)
cmd = [scraper, 'cluster', '-i', filename, '-o',
- xml_path, '-nd', '--define', 'in-progress']
+ xml_path, '-nd']
+ if include_in_progress:
+ cmd.extend(['--define', 'in-progress'])
if dry_run:
print(cmd)
else:
@@ -97,19 +121,38 @@
def scrape_all_clusters(dir: str, exclude_list: list[str] = []) -> None:
for filename in glob.glob(f'{dir}/*.adoc'):
- if os.path.basename(filename) in exclude_list:
- continue
scrape_cluster(filename)
- scrape_all_clusters(sdm_clusters_dir, sdm_exclude_list)
- scrape_all_clusters(app_clusters_dir, app_exclude_list)
- scrape_all_clusters(media_clusters_dir, media_exclude_list)
- for f in dm_clusters_list:
- filename = f'{dm_clusters_dir}/{f}'
- scrape_cluster(filename)
+ scrape_all_clusters(dm_clusters_dir)
+ scrape_all_clusters(sdm_clusters_dir)
+ scrape_all_clusters(app_clusters_dir)
+ scrape_all_clusters(media_clusters_dir)
+
+ for xml_path in glob.glob(f'{clusters_output_dir}/*.xml'):
+ tree = ElementTree.parse(f'{xml_path}')
+ root = tree.getroot()
+ cluster = next(root.iter('cluster'))
+ # If there's no cluster ID table, this isn't a cluster
+ try:
+ next(cluster.iter('clusterIds'))
+ except StopIteration:
+ # If there's no cluster ID table, this isn't a cluster just some kind of intro adoc
+ print(f'Removing file {xml_path} as it does not include any cluster definitions')
+ os.remove(xml_path)
+ continue
+ # For now, we're going to manually remove the word "Cluster" from the cluster name field
+ # to make the diff easier. The update to 1.2.4 of the scraper added this.
+ # TODO: submit a separate PR with JUST this change revered and remove this code.
+ with open(xml_path, 'rb') as input:
+ xml_str = input.read()
+
+ original_name = bytes(cluster.attrib['name'], 'utf-8')
+ replacement_name = bytes(cluster.attrib['name'].removesuffix(" Cluster"), 'utf-8')
+ with open(xml_path, 'wb') as output:
+ output.write(xml_str.replace(original_name, replacement_name))
-def scrape_device_types(scraper, spec_root, output_dir, dry_run):
+def scrape_device_types(scraper, spec_root, output_dir, dry_run, include_in_progress):
device_type_dir = os.path.abspath(
os.path.join(spec_root, 'src', 'device_types'))
device_types_output_dir = os.path.abspath(
@@ -119,9 +162,16 @@
if not os.path.exists(device_types_output_dir):
os.makedirs(device_types_output_dir)
+ print('Generating device type library to get file include list - this make take a few minutes')
+ device_type_output = make_asciidoc('pdf-devicelibrary-book', include_in_progress, spec_root, dry_run)
+
def scrape_device_type(filename: str) -> None:
+ base = Path(filename).stem
+ if base not in device_type_output:
+ print(f'skipping file: {filename} as it is not compiled into the asciidoc')
+ return
xml_path = get_xml_path(filename, device_types_output_dir)
- cmd = [scraper, 'devicetype', '-c', clusters_output_dir,
+ cmd = [scraper, 'devicetype', '-c', '-cls', clusters_output_dir,
'-nd', '-i', filename, '-o', xml_path]
if dry_run:
print(cmd)
@@ -187,7 +237,8 @@
json_file = os.path.join(clusters_output_dir, 'cluster_ids.json')
with open(json_file, "w") as outfile:
- json.dump(json_dict, outfile, indent=2)
+ json.dump(json_dict, outfile, indent=4)
+ outfile.write('\n')
if __name__ == '__main__':