We have this case where we have to use data from an external web service. It delivers 16’000 entries and this is a very slow thing (if you want all data at once). So we have to store some data in our Plone site so we can use it afterward in our vocabularies.
@davisagli suggested using a BTree so I started with development and now I have a solution… But I’m unsure if I introduced a memory leak. I don’t know if overriding the whole stored BTree (with all entries; Line 132 in _set_oes) is a problem. What happens with the BTree which was stored before?
My approach is a new Toolset (copied from Core Tools because I didn’t find any documentation on this topic). The data will be introduced with a view (called by a cronjob) that calls the refresh_oes-Method.
So I hope someone with more experience with the usage of BTrees / Toolsets (see below) could have a look at it and give feedback.
# -*- coding: utf-8 -*-
from fhnw.core.interfaces import IOEDataStoreTool
from AccessControl.class_init import InitializeClass
from AccessControl.SecurityInfo import ClassSecurityInfo
from App.Common import package_home
from App.special_dtml import DTMLFile
from datetime import datetime
from BTrees.check import check
from BTrees.OOBTree import OOBTree
from os import path as os_path
from plone import api
from Products.CMFCore.permissions import ManagePortal
from Products.CMFCore.utils import UniqueObject
from Products.CMFCore.utils import registerToolInterface
from OFS.PropertyManager import PropertyManager
from OFS.SimpleItem import SimpleItem
from typing import Any
from typing import Dict
from typing import List
from typing import Tuple
from zope.interface import implementer
import logging
logger = logging.getLogger("fhnw.core")
_dtmldir = os_path.join(package_home(globals()), "dtml")
@implementer(IOEDataStoreTool)
class OEDataStoreTool(UniqueObject, SimpleItem, PropertyManager):
id: str = "fhnw_datastore_oe"
meta_type = "FHNWCore DataStoreOE Tool"
zmi_icon: str = "fas fa-database"
security: ClassSecurityInfo = ClassSecurityInfo()
_properties: Tuple[Dict] = (
{"id": "last_update", "type": "string", "mode": ""},
{"id": "running", "type": "boolean", "mode": "w"},
)
last_update: str = None
running: bool = False
manage_options: Tuple[Dict] = (
({"label": "Overview", "action": "manage_overview"},)
+ PropertyManager.manage_options
+ SimpleItem.manage_options
)
security.declareProtected(ManagePortal, "manage_overview")
manage_overview = DTMLFile("explainOEDataStoreTool", _dtmldir)
def __init__(self) -> None:
self._oe_data: OOBTree = OOBTree()
@security.private
def get_oes(self) -> List[Dict]:
"""Returns stored OE data.
Returns:
list: OEs
"""
return list(self._oe_data.values())
@security.private
def refresh_oes(self) -> bool:
"""Refreshes the OE data.
If refresh is already running the new call will be aborted.
Returns:
bool: Determines if the update was successful
"""
if self.running:
logger.info("Refreshing of OE data is already running.")
return False
self.running = True
data: List[Dict] = self._fetch_oes()
if data:
try:
self._set_oes(data)
self.running = False
return True
except (ValueError, AssertionError):
logger.warning("Could not set OE")
self.running = False
return False
@security.private
def _set_oes(self, oe_data: List[Dict]) -> None:
"""Sets the whole oe data to the given value
Args:
oe_data (List[Dict]): all OE entries within a list.
Raises:
ValueError: Given OE data is not of type dict.
AssertionError: If anything is wrong with the new value of oe_data
"""
if not isinstance(oe_data, list):
raise ValueError("Given oe_data is not of type dict.")
# we explicitly use here a mapping (like dict) because in list-like data structures
# the values should be imutable. The values are a dict and not immutable so we store the dict
# as value within a BTree with "organisationId" as key.
new_oe_data: OOBTree = OOBTree()
for oe_entry in oe_data:
if not isinstance(oe_entry, dict):
logger.warning(
f'Entry is of type "{type(oe_entry)}" and is not added to stored OE data. Entry data: {oe_entry}'
)
continue
if (
"organisationId" not in oe_entry
or "name" not in oe_entry
or "nameEn" not in oe_entry
):
logger.warning(
f"Entry is not well formated and is not added to stored OE data. Entry data: {oe_entry} "
)
continue
new_oe_data[oe_entry["organisationId"]] = oe_entry
check(
new_oe_data
) # check if OOBTree is valid and healthy before we override the old oe data.
self._oe_data = new_oe_data
logger.info("Successfully updated OE data.")
self.last_update = datetime.now().isoformat()
@security.private
def _fetch_oes(self) -> List[Dict]:
data: List = []
error: str = None
portal: Any = api.portal.get()
# we need to use unrestrictedTraverse here because we use this vocabulary in anonymous
# scenarios (Ajax calls, etc.) and don't want to expose our WSUrl objects
# don't use plone.api methods here! They rely on restricted access.
wsurl: Any = portal.unrestrictedTraverse("oe-webservice", default=None)
if wsurl is not None:
try:
data = wsurl.get_data()
except ConnectionError as e:
error = f"Connecting failed. Msg: {e}"
except ValueError as e:
error = f"Parsing json failed. Msg: {e}"
else:
error = "Webservice not found."
if not isinstance(data, list):
error = f"Data used for OEs is not in a desired format. Data: {data}"
if error:
logger.warning(
f"An error has occured during fetching of OEs. "
f"Webservice: '{self.content_id}' - Output: {error}"
)
return data
InitializeClass(OEDataStoreTool)
registerToolInterface("fhnw_datastore_oe", IOEDataStoreTool)