#! /usr/bin/env python3
# -*- coding: utf-8 -*-
"""
:samp:`Executor creating resourcelists`
"""
import os
from resync import Resource
from resync import ResourceList
from rspub.core.executors import Executor, SitemapData
from rspub.core.rs_enum import Capability
from rspub.util import defaults
[docs]class ResourceListExecutor(Executor):
"""
:samp:`Executes the new resourcelist strategy`
A ResourceListExecutor clears the metadata directory and creates new resourcelist(s) every time
the executor runs (and is_saving_sitemaps).
"""
[docs] def generate_rs_documents(self, filenames: iter) -> [SitemapData]:
sitemap_data_iter = []
generator = self.resourcelist_generator(filenames)
for sitemap_data, sitemap in generator():
sitemap_data_iter.append(sitemap_data)
return sitemap_data_iter
[docs] def create_index(self, sitemap_data_iter: iter):
if len(sitemap_data_iter) > 1:
resourcelist_index = ResourceList()
resourcelist_index.sitemapindex = True
resourcelist_index.md_at = self.date_start_processing
resourcelist_index.md_completed = self.date_end_processing
index_path = self.para.abs_metadata_path("resourcelist-index.xml")
rel_index_path = os.path.relpath(index_path, self.para.resource_dir)
index_url = self.para.url_prefix + defaults.sanitize_url_path(rel_index_path)
resourcelist_index.link_set(rel="up", href=self.para.capabilitylist_url())
for sitemap_data in sitemap_data_iter:
resourcelist_index.add(Resource(uri=sitemap_data.uri, md_at=sitemap_data.doc_start,
md_completed=sitemap_data.doc_end))
if sitemap_data.document_saved:
self.update_rel_index(index_url, sitemap_data.path, ResourceList())
self.finish_sitemap(-1, resourcelist_index)
[docs] def resourcelist_generator(self, filenames: iter) -> iter:
def generator() -> [SitemapData, ResourceList]:
resourcelist = None
ordinal = self.find_ordinal(Capability.resourcelist.name)
resource_count = 0
doc_start = None
resource_generator = self.resource_generator()
for resource_count, resource in resource_generator(filenames):
# stuff resource into resourcelist
if resourcelist is None:
resourcelist = ResourceList()
doc_start = defaults.w3c_now()
resourcelist.md_at = doc_start
resourcelist.add(resource)
# under conditions: yield the current resourcelist
if resource_count % self.para.max_items_in_list == 0:
ordinal += 1
doc_end = defaults.w3c_now()
resourcelist.md_completed = doc_end
sitemap_data = self.finish_sitemap(ordinal, resourcelist, doc_start=doc_start, doc_end=doc_end)
yield sitemap_data, resourcelist
resourcelist = None
# under conditions: yield the current and last resourcelist
if resourcelist:
ordinal += 1
doc_end = defaults.w3c_now()
resourcelist.md_completed = doc_end
sitemap_data = self.finish_sitemap(ordinal, resourcelist, doc_start=doc_start, doc_end=doc_end)
yield sitemap_data, resourcelist
return generator