rtutil: Add RTLinkCache class to cache links to disk.
This will greatly reduce RT requests across multiple runs and speed up link checking/conversion.
This commit is contained in:
parent
f227593655
commit
a8407c7b6a
4 changed files with 236 additions and 6 deletions
|
@ -126,8 +126,16 @@ class Config:
|
||||||
wrapper_client = self.rt_client(credentials, client)
|
wrapper_client = self.rt_client(credentials, client)
|
||||||
if wrapper_client is None:
|
if wrapper_client is None:
|
||||||
return None
|
return None
|
||||||
|
cache_dir_path = self.cache_dir_path()
|
||||||
|
if cache_dir_path is None:
|
||||||
|
cache_db = None
|
||||||
else:
|
else:
|
||||||
return rtutil.RT(wrapper_client)
|
cache_name = '{}@{}.sqlite3'.format(
|
||||||
|
credentials.user,
|
||||||
|
urlparse.quote(str(credentials.server), ''),
|
||||||
|
)
|
||||||
|
cache_db = rtutil.RTLinkCache.setup(cache_dir_path / cache_name)
|
||||||
|
return rtutil.RT(wrapper_client, cache_db)
|
||||||
|
|
||||||
def rt_wrapper(self,
|
def rt_wrapper(self,
|
||||||
credentials: RTCredentials=None,
|
credentials: RTCredentials=None,
|
||||||
|
|
|
@ -17,17 +17,142 @@
|
||||||
import functools
|
import functools
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import re
|
import re
|
||||||
|
import sqlite3
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
import rt
|
import rt
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from typing import (
|
from typing import (
|
||||||
|
Callable,
|
||||||
|
Iterator,
|
||||||
|
MutableMapping,
|
||||||
Optional,
|
Optional,
|
||||||
|
Set,
|
||||||
Tuple,
|
Tuple,
|
||||||
Union,
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
RTId = Union[int, str]
|
RTId = Union[int, str]
|
||||||
|
TicketAttachmentIds = Tuple[str, Optional[str]]
|
||||||
|
_LinkCache = MutableMapping[TicketAttachmentIds, Optional[str]]
|
||||||
|
|
||||||
|
class RTLinkCache(_LinkCache):
|
||||||
|
"""Cache RT links to disk
|
||||||
|
|
||||||
|
This class provides a dict-like interface to a cache of RT links.
|
||||||
|
Once an object is in RT, a link to it should never change.
|
||||||
|
The only exception is when objects get shredded, and those objects
|
||||||
|
shouldn't be referenced in books anyway.
|
||||||
|
|
||||||
|
This implementation is backed by a sqlite database. You can call::
|
||||||
|
|
||||||
|
db = RTLinkCache.setup(path)
|
||||||
|
|
||||||
|
This method will try to open a sqlite database at the given path,
|
||||||
|
and set up necessary tables, etc.
|
||||||
|
If it succeeds, it returns a database connection you can use to
|
||||||
|
initialize the cache.
|
||||||
|
If it fails, it returns None, and the caller should use some other
|
||||||
|
dict-like object (like a normal dict) for caching.
|
||||||
|
You can give the result to the RT utility class either way,
|
||||||
|
and it will do the right thing for itself::
|
||||||
|
|
||||||
|
rt = RT(rt_client, db)
|
||||||
|
"""
|
||||||
|
|
||||||
|
CREATE_TABLE_SQL = """CREATE TABLE IF NOT EXISTS RTLinkCache(
|
||||||
|
ticket_id TEXT NOT NULL,
|
||||||
|
attachment_id TEXT,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (ticket_id, attachment_id)
|
||||||
|
)"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setup(cls, cache_path: Path) -> Optional[sqlite3.Connection]:
|
||||||
|
try:
|
||||||
|
db = sqlite3.connect(cache_path, isolation_level=None)
|
||||||
|
cursor = db.cursor()
|
||||||
|
cursor.execute(cls.CREATE_TABLE_SQL)
|
||||||
|
cursor.execute('SELECT url FROM RTLinkCache LIMIT 1')
|
||||||
|
have_data = cursor.fetchone() is not None
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# If we couldn't get this far, sqlite provides no benefit.
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
# There shouldn't be any records where url is NULL, so running this
|
||||||
|
# DELETE pulls double duty for us: it tells us whether or not we
|
||||||
|
# can write to the database and it enforces database integrity.
|
||||||
|
cursor.execute('DELETE FROM RTLinkCache WHERE url IS NULL')
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
can_write = False
|
||||||
|
else:
|
||||||
|
can_write = True
|
||||||
|
print(cache_path, have_data, can_write)
|
||||||
|
if not (can_write or have_data):
|
||||||
|
# If there's nothing to read and no way to write, sqlite provides
|
||||||
|
# no benefit.
|
||||||
|
return None
|
||||||
|
elif not can_write:
|
||||||
|
# Set up an in-memory database that we can write to, seeded with
|
||||||
|
# the data available to read.
|
||||||
|
try:
|
||||||
|
cursor.close()
|
||||||
|
db.close()
|
||||||
|
db = sqlite3.connect(':memory:', isolation_level=None)
|
||||||
|
cursor = db.cursor()
|
||||||
|
cursor.execute('ATTACH DATABASE ? AS readsource',
|
||||||
|
('{}?mode=ro'.format(cache_path.as_uri()),))
|
||||||
|
cursor.execute(cls.CREATE_TABLE_SQL)
|
||||||
|
cursor.execute('INSERT INTO RTLinkCache SELECT * FROM readsource.RTLinkCache')
|
||||||
|
cursor.execute('DETACH DATABASE readsource')
|
||||||
|
except sqlite3.OperationalError as error:
|
||||||
|
# We're back to the case of having nothing to read and no way
|
||||||
|
# to write.
|
||||||
|
return None
|
||||||
|
cursor.close()
|
||||||
|
db.commit()
|
||||||
|
return db
|
||||||
|
|
||||||
|
def __init__(self, cache_db: sqlite3.Connection) -> None:
|
||||||
|
self._db = cache_db
|
||||||
|
self._nourls: Set[TicketAttachmentIds] = set()
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator[TicketAttachmentIds]:
|
||||||
|
yield from self._db.execute('SELECT ticket_id, attachment_id FROM RTLinkCache')
|
||||||
|
yield from self._nourls
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
cursor = self._db.execute('SELECT COUNT(*) FROM RTLinkCache')
|
||||||
|
return cursor.fetchone()[0] + len(self._nourls)
|
||||||
|
|
||||||
|
def __getitem__(self, key: TicketAttachmentIds) -> Optional[str]:
|
||||||
|
if key in self._nourls:
|
||||||
|
return None
|
||||||
|
cursor = self._db.execute(
|
||||||
|
'SELECT url FROM RTLinkCache WHERE ticket_id = ? AND attachment_id IS ?',
|
||||||
|
key,
|
||||||
|
)
|
||||||
|
retval = cursor.fetchone()
|
||||||
|
if retval is None:
|
||||||
|
raise KeyError(key)
|
||||||
|
else:
|
||||||
|
return retval[0]
|
||||||
|
|
||||||
|
def __setitem__(self, key: TicketAttachmentIds, value: Optional[str]) -> None:
|
||||||
|
if value is None:
|
||||||
|
self._nourls.add(key)
|
||||||
|
else:
|
||||||
|
ticket_id, attachment_id = key
|
||||||
|
self._db.execute(
|
||||||
|
'INSERT INTO RTLinkCache VALUES(?, ?, ?)',
|
||||||
|
(ticket_id, attachment_id, value),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __delitem__(self, key: TicketAttachmentIds) -> None:
|
||||||
|
raise NotImplementedError("RTLinkCache.__delitem__")
|
||||||
|
|
||||||
|
|
||||||
class RT:
|
class RT:
|
||||||
"""RT utility wrapper class
|
"""RT utility wrapper class
|
||||||
|
@ -38,7 +163,9 @@ class RT:
|
||||||
* Parse links
|
* Parse links
|
||||||
* Verify that they refer to extant objects in RT
|
* Verify that they refer to extant objects in RT
|
||||||
* Convert metadata links to RT web links
|
* Convert metadata links to RT web links
|
||||||
* Cache results, to reduce network requests
|
* Cache results, to reduce network requests.
|
||||||
|
You can set up an RTLinkCache to cache links to disks over multiple runs.
|
||||||
|
Refer to RTLinkCache's docstring for details and instructions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PARSE_REGEXPS = [
|
PARSE_REGEXPS = [
|
||||||
|
@ -46,8 +173,7 @@ class RT:
|
||||||
re.compile(r'^rt://ticket/([0-9]+)(?:/attachments?/([0-9]+))?/?$'),
|
re.compile(r'^rt://ticket/([0-9]+)(?:/attachments?/([0-9]+))?/?$'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, rt_client: rt.Rt) -> None:
|
def __init__(self, rt_client: rt.Rt, cache_db: Optional[sqlite3.Connection]=None) -> None:
|
||||||
self.rt = rt_client
|
|
||||||
urlparts = urlparse.urlparse(rt_client.url)
|
urlparts = urlparse.urlparse(rt_client.url)
|
||||||
try:
|
try:
|
||||||
index = urlparts.path.rindex('/REST/')
|
index = urlparts.path.rindex('/REST/')
|
||||||
|
@ -56,6 +182,32 @@ class RT:
|
||||||
else:
|
else:
|
||||||
base_path = urlparts.path[:index + 1]
|
base_path = urlparts.path[:index + 1]
|
||||||
self.url_base = urlparts._replace(path=base_path)
|
self.url_base = urlparts._replace(path=base_path)
|
||||||
|
self.rt = rt_client
|
||||||
|
self._cache: _LinkCache
|
||||||
|
if cache_db is None:
|
||||||
|
self._cache = {}
|
||||||
|
else:
|
||||||
|
self._cache = RTLinkCache(cache_db)
|
||||||
|
|
||||||
|
# mypy complains that the first argument isn't self, but this isn't meant
|
||||||
|
# to be a method, it's just an internal decrator.
|
||||||
|
def _cache_method(func: Callable) -> Callable: # type:ignore[misc]
|
||||||
|
@functools.wraps(func)
|
||||||
|
def caching_wrapper(self,
|
||||||
|
ticket_id: RTId,
|
||||||
|
attachment_id: Optional[RTId]=None,
|
||||||
|
) -> str:
|
||||||
|
cache_key = (str(ticket_id), attachment_id and str(attachment_id))
|
||||||
|
try:
|
||||||
|
url = self._cache[cache_key]
|
||||||
|
except KeyError:
|
||||||
|
if attachment_id is None:
|
||||||
|
url = func(self, ticket_id)
|
||||||
|
else:
|
||||||
|
url = func(self, ticket_id, attachment_id)
|
||||||
|
self._cache[cache_key] = url
|
||||||
|
return url
|
||||||
|
return caching_wrapper
|
||||||
|
|
||||||
def _extend_url(self,
|
def _extend_url(self,
|
||||||
path_tail: str,
|
path_tail: str,
|
||||||
|
@ -84,7 +236,7 @@ class RT:
|
||||||
fragment = 'txn-{}'.format(txn_id)
|
fragment = 'txn-{}'.format(txn_id)
|
||||||
return self._extend_url('Ticket/Display.html', fragment, id=str(ticket_id))
|
return self._extend_url('Ticket/Display.html', fragment, id=str(ticket_id))
|
||||||
|
|
||||||
@functools.lru_cache()
|
@_cache_method
|
||||||
def attachment_url(self, ticket_id: RTId, attachment_id: RTId) -> Optional[str]:
|
def attachment_url(self, ticket_id: RTId, attachment_id: RTId) -> Optional[str]:
|
||||||
attachment = self.rt.get_attachment(ticket_id, attachment_id)
|
attachment = self.rt.get_attachment(ticket_id, attachment_id)
|
||||||
if attachment is None:
|
if attachment is None:
|
||||||
|
@ -118,7 +270,7 @@ class RT:
|
||||||
return (ticket_id, attachment_id)
|
return (ticket_id, attachment_id)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@functools.lru_cache()
|
@_cache_method
|
||||||
def ticket_url(self, ticket_id: RTId) -> Optional[str]:
|
def ticket_url(self, ticket_id: RTId) -> Optional[str]:
|
||||||
if self.rt.get_ticket(ticket_id) is None:
|
if self.rt.get_ticket(ticket_id) is None:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -207,6 +207,22 @@ def test_rt_wrapper_cache_responds_to_external_credential_changes(rt_environ):
|
||||||
rt2 = config.rt_wrapper(None, testutil.RTClient)
|
rt2 = config.rt_wrapper(None, testutil.RTClient)
|
||||||
assert rt1 is not rt2
|
assert rt1 is not rt2
|
||||||
|
|
||||||
|
def test_rt_wrapper_has_cache(tmp_path):
|
||||||
|
with update_environ(XDG_CACHE_DIR=tmp_path):
|
||||||
|
config = config_mod.Config()
|
||||||
|
rt = config.rt_wrapper(None, testutil.RTClient)
|
||||||
|
rt.exists(1)
|
||||||
|
expected = 'conservancy_beancount/{}@*.sqlite3'.format(RT_FILE_CREDS[1])
|
||||||
|
assert any(tmp_path.glob(expected))
|
||||||
|
|
||||||
|
def test_rt_wrapper_without_cache(tmp_path):
|
||||||
|
tmp_path.chmod(0)
|
||||||
|
with update_environ(XDG_CACHE_DIR=tmp_path):
|
||||||
|
config = config_mod.Config()
|
||||||
|
rt = config.rt_wrapper(None, testutil.RTClient)
|
||||||
|
tmp_path.chmod(0o600)
|
||||||
|
assert not any(tmp_path.iterdir())
|
||||||
|
|
||||||
def test_cache_mkdir(tmp_path):
|
def test_cache_mkdir(tmp_path):
|
||||||
expected = tmp_path / 'TESTcache'
|
expected = tmp_path / 'TESTcache'
|
||||||
with update_environ(XDG_CACHE_DIR=tmp_path):
|
with update_environ(XDG_CACHE_DIR=tmp_path):
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from . import testutil
|
from . import testutil
|
||||||
|
@ -46,6 +48,14 @@ def new_client():
|
||||||
TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
|
TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
|
||||||
return RTClient()
|
return RTClient()
|
||||||
|
|
||||||
|
def new_cache(database=':memory:'):
|
||||||
|
db = rtutil.RTLinkCache.setup(database)
|
||||||
|
if db is None:
|
||||||
|
print("NOTE: did not set up database cache at {}".format(database))
|
||||||
|
return contextlib.nullcontext(db)
|
||||||
|
else:
|
||||||
|
return contextlib.closing(db)
|
||||||
|
|
||||||
@pytest.mark.parametrize('ticket_id,attachment_id,expected', EXPECTED_URLS)
|
@pytest.mark.parametrize('ticket_id,attachment_id,expected', EXPECTED_URLS)
|
||||||
def test_url(rt, ticket_id, attachment_id, expected):
|
def test_url(rt, ticket_id, attachment_id, expected):
|
||||||
if expected is not None:
|
if expected is not None:
|
||||||
|
@ -125,3 +135,47 @@ def test_uncommon_server_url_parsing():
|
||||||
client = testutil.RTClient(url + 'REST/1.0/')
|
client = testutil.RTClient(url + 'REST/1.0/')
|
||||||
rt = rtutil.RT(client)
|
rt = rtutil.RT(client)
|
||||||
assert rt.url(1).startswith(url)
|
assert rt.url(1).startswith(url)
|
||||||
|
|
||||||
|
def test_shared_cache(new_client):
|
||||||
|
ticket_id, _, expected = EXPECTED_URLS[0]
|
||||||
|
expected = DEFAULT_RT_URL + expected
|
||||||
|
with new_cache() as cachedb:
|
||||||
|
rt1 = rtutil.RT(new_client, cachedb)
|
||||||
|
assert rt1.url(ticket_id) == expected
|
||||||
|
new_client.TICKET_DATA.clear()
|
||||||
|
rt2 = rtutil.RT(new_client, cachedb)
|
||||||
|
assert rt2.url(ticket_id) == expected
|
||||||
|
assert not rt2.exists(ticket_id + 1)
|
||||||
|
assert rt1 is not rt2
|
||||||
|
|
||||||
|
def test_no_shared_cache(new_client):
|
||||||
|
with new_cache() as cache1, new_cache() as cache2:
|
||||||
|
rt1 = rtutil.RT(new_client, cache1)
|
||||||
|
rt2 = rtutil.RT(new_client, cache2)
|
||||||
|
assert rt1.exists(1)
|
||||||
|
new_client.TICKET_DATA.clear()
|
||||||
|
assert not rt2.exists(1)
|
||||||
|
assert rt1.exists(1)
|
||||||
|
|
||||||
|
def test_read_only_cache(new_client, tmp_path):
|
||||||
|
db_path = tmp_path / 'test.db'
|
||||||
|
ticket_id, _, expected = EXPECTED_URLS[0]
|
||||||
|
expected = DEFAULT_RT_URL + expected
|
||||||
|
with new_cache(db_path) as cache1:
|
||||||
|
rt1 = rtutil.RT(new_client, cache1)
|
||||||
|
assert rt1.url(ticket_id) == expected
|
||||||
|
new_client.TICKET_DATA.clear()
|
||||||
|
db_path.chmod(0o400)
|
||||||
|
with new_cache(db_path) as cache2:
|
||||||
|
rt2 = rtutil.RT(new_client, cache2)
|
||||||
|
assert rt2.url(ticket_id) == expected
|
||||||
|
assert rt2.url(ticket_id + 1) is None
|
||||||
|
|
||||||
|
def test_results_not_found_only_in_transient_cache(new_client):
|
||||||
|
with new_cache() as cache:
|
||||||
|
rt1 = rtutil.RT(new_client, cache)
|
||||||
|
rt2 = rtutil.RT(new_client, cache)
|
||||||
|
assert not rt1.exists(9)
|
||||||
|
new_client.TICKET_DATA['9'] = [('99', '(Unnamed)', 'text/plain', '0b')]
|
||||||
|
assert not rt1.exists(9)
|
||||||
|
assert rt2.exists(9)
|
||||||
|
|
Loading…
Reference in a new issue