rtutil: Add RTLinkCache class to cache links to disk.

This will greatly reduce RT requests across multiple runs
and speed up link checking/conversion.
This commit is contained in:
Brett Smith 2020-03-27 07:35:45 -04:00
parent f227593655
commit a8407c7b6a
4 changed files with 236 additions and 6 deletions

View file

@ -126,8 +126,16 @@ class Config:
wrapper_client = self.rt_client(credentials, client)
if wrapper_client is None:
return None
cache_dir_path = self.cache_dir_path()
if cache_dir_path is None:
cache_db = None
else:
return rtutil.RT(wrapper_client)
cache_name = '{}@{}.sqlite3'.format(
credentials.user,
urlparse.quote(str(credentials.server), ''),
)
cache_db = rtutil.RTLinkCache.setup(cache_dir_path / cache_name)
return rtutil.RT(wrapper_client, cache_db)
def rt_wrapper(self,
credentials: RTCredentials=None,

View file

@ -17,17 +17,142 @@
import functools
import mimetypes
import re
import sqlite3
import urllib.parse as urlparse
import rt
from pathlib import Path
from typing import (
Callable,
Iterator,
MutableMapping,
Optional,
Set,
Tuple,
Union,
)
RTId = Union[int, str]
TicketAttachmentIds = Tuple[str, Optional[str]]
_LinkCache = MutableMapping[TicketAttachmentIds, Optional[str]]
class RTLinkCache(_LinkCache):
"""Cache RT links to disk
This class provides a dict-like interface to a cache of RT links.
Once an object is in RT, a link to it should never change.
The only exception is when objects get shredded, and those objects
shouldn't be referenced in books anyway.
This implementation is backed by a sqlite database. You can call::
db = RTLinkCache.setup(path)
This method will try to open a sqlite database at the given path,
and set up necessary tables, etc.
If it succeeds, it returns a database connection you can use to
initialize the cache.
If it fails, it returns None, and the caller should use some other
dict-like object (like a normal dict) for caching.
You can give the result to the RT utility class either way,
and it will do the right thing for itself::
rt = RT(rt_client, db)
"""
CREATE_TABLE_SQL = """CREATE TABLE IF NOT EXISTS RTLinkCache(
ticket_id TEXT NOT NULL,
attachment_id TEXT,
url TEXT NOT NULL,
PRIMARY KEY (ticket_id, attachment_id)
)"""
@classmethod
def setup(cls, cache_path: Path) -> Optional[sqlite3.Connection]:
try:
db = sqlite3.connect(cache_path, isolation_level=None)
cursor = db.cursor()
cursor.execute(cls.CREATE_TABLE_SQL)
cursor.execute('SELECT url FROM RTLinkCache LIMIT 1')
have_data = cursor.fetchone() is not None
except sqlite3.OperationalError:
# If we couldn't get this far, sqlite provides no benefit.
return None
try:
# There shouldn't be any records where url is NULL, so running this
# DELETE pulls double duty for us: it tells us whether or not we
# can write to the database and it enforces database integrity.
cursor.execute('DELETE FROM RTLinkCache WHERE url IS NULL')
except sqlite3.OperationalError:
can_write = False
else:
can_write = True
print(cache_path, have_data, can_write)
if not (can_write or have_data):
# If there's nothing to read and no way to write, sqlite provides
# no benefit.
return None
elif not can_write:
# Set up an in-memory database that we can write to, seeded with
# the data available to read.
try:
cursor.close()
db.close()
db = sqlite3.connect(':memory:', isolation_level=None)
cursor = db.cursor()
cursor.execute('ATTACH DATABASE ? AS readsource',
('{}?mode=ro'.format(cache_path.as_uri()),))
cursor.execute(cls.CREATE_TABLE_SQL)
cursor.execute('INSERT INTO RTLinkCache SELECT * FROM readsource.RTLinkCache')
cursor.execute('DETACH DATABASE readsource')
except sqlite3.OperationalError as error:
# We're back to the case of having nothing to read and no way
# to write.
return None
cursor.close()
db.commit()
return db
def __init__(self, cache_db: sqlite3.Connection) -> None:
self._db = cache_db
self._nourls: Set[TicketAttachmentIds] = set()
def __iter__(self) -> Iterator[TicketAttachmentIds]:
yield from self._db.execute('SELECT ticket_id, attachment_id FROM RTLinkCache')
yield from self._nourls
def __len__(self) -> int:
cursor = self._db.execute('SELECT COUNT(*) FROM RTLinkCache')
return cursor.fetchone()[0] + len(self._nourls)
def __getitem__(self, key: TicketAttachmentIds) -> Optional[str]:
if key in self._nourls:
return None
cursor = self._db.execute(
'SELECT url FROM RTLinkCache WHERE ticket_id = ? AND attachment_id IS ?',
key,
)
retval = cursor.fetchone()
if retval is None:
raise KeyError(key)
else:
return retval[0]
def __setitem__(self, key: TicketAttachmentIds, value: Optional[str]) -> None:
if value is None:
self._nourls.add(key)
else:
ticket_id, attachment_id = key
self._db.execute(
'INSERT INTO RTLinkCache VALUES(?, ?, ?)',
(ticket_id, attachment_id, value),
)
def __delitem__(self, key: TicketAttachmentIds) -> None:
raise NotImplementedError("RTLinkCache.__delitem__")
class RT:
"""RT utility wrapper class
@ -38,7 +163,9 @@ class RT:
* Parse links
* Verify that they refer to extant objects in RT
* Convert metadata links to RT web links
* Cache results, to reduce network requests
* Cache results, to reduce network requests.
You can set up an RTLinkCache to cache links to disks over multiple runs.
Refer to RTLinkCache's docstring for details and instructions.
"""
PARSE_REGEXPS = [
@ -46,8 +173,7 @@ class RT:
re.compile(r'^rt://ticket/([0-9]+)(?:/attachments?/([0-9]+))?/?$'),
]
def __init__(self, rt_client: rt.Rt) -> None:
self.rt = rt_client
def __init__(self, rt_client: rt.Rt, cache_db: Optional[sqlite3.Connection]=None) -> None:
urlparts = urlparse.urlparse(rt_client.url)
try:
index = urlparts.path.rindex('/REST/')
@ -56,6 +182,32 @@ class RT:
else:
base_path = urlparts.path[:index + 1]
self.url_base = urlparts._replace(path=base_path)
self.rt = rt_client
self._cache: _LinkCache
if cache_db is None:
self._cache = {}
else:
self._cache = RTLinkCache(cache_db)
# mypy complains that the first argument isn't self, but this isn't meant
# to be a method, it's just an internal decrator.
def _cache_method(func: Callable) -> Callable: # type:ignore[misc]
@functools.wraps(func)
def caching_wrapper(self,
ticket_id: RTId,
attachment_id: Optional[RTId]=None,
) -> str:
cache_key = (str(ticket_id), attachment_id and str(attachment_id))
try:
url = self._cache[cache_key]
except KeyError:
if attachment_id is None:
url = func(self, ticket_id)
else:
url = func(self, ticket_id, attachment_id)
self._cache[cache_key] = url
return url
return caching_wrapper
def _extend_url(self,
path_tail: str,
@ -84,7 +236,7 @@ class RT:
fragment = 'txn-{}'.format(txn_id)
return self._extend_url('Ticket/Display.html', fragment, id=str(ticket_id))
@functools.lru_cache()
@_cache_method
def attachment_url(self, ticket_id: RTId, attachment_id: RTId) -> Optional[str]:
attachment = self.rt.get_attachment(ticket_id, attachment_id)
if attachment is None:
@ -118,7 +270,7 @@ class RT:
return (ticket_id, attachment_id)
return None
@functools.lru_cache()
@_cache_method
def ticket_url(self, ticket_id: RTId) -> Optional[str]:
if self.rt.get_ticket(ticket_id) is None:
return None

View file

@ -207,6 +207,22 @@ def test_rt_wrapper_cache_responds_to_external_credential_changes(rt_environ):
rt2 = config.rt_wrapper(None, testutil.RTClient)
assert rt1 is not rt2
def test_rt_wrapper_has_cache(tmp_path):
with update_environ(XDG_CACHE_DIR=tmp_path):
config = config_mod.Config()
rt = config.rt_wrapper(None, testutil.RTClient)
rt.exists(1)
expected = 'conservancy_beancount/{}@*.sqlite3'.format(RT_FILE_CREDS[1])
assert any(tmp_path.glob(expected))
def test_rt_wrapper_without_cache(tmp_path):
tmp_path.chmod(0)
with update_environ(XDG_CACHE_DIR=tmp_path):
config = config_mod.Config()
rt = config.rt_wrapper(None, testutil.RTClient)
tmp_path.chmod(0o600)
assert not any(tmp_path.iterdir())
def test_cache_mkdir(tmp_path):
expected = tmp_path / 'TESTcache'
with update_environ(XDG_CACHE_DIR=tmp_path):

View file

@ -14,6 +14,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import contextlib
import pytest
from . import testutil
@ -46,6 +48,14 @@ def new_client():
TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
return RTClient()
def new_cache(database=':memory:'):
db = rtutil.RTLinkCache.setup(database)
if db is None:
print("NOTE: did not set up database cache at {}".format(database))
return contextlib.nullcontext(db)
else:
return contextlib.closing(db)
@pytest.mark.parametrize('ticket_id,attachment_id,expected', EXPECTED_URLS)
def test_url(rt, ticket_id, attachment_id, expected):
if expected is not None:
@ -125,3 +135,47 @@ def test_uncommon_server_url_parsing():
client = testutil.RTClient(url + 'REST/1.0/')
rt = rtutil.RT(client)
assert rt.url(1).startswith(url)
def test_shared_cache(new_client):
ticket_id, _, expected = EXPECTED_URLS[0]
expected = DEFAULT_RT_URL + expected
with new_cache() as cachedb:
rt1 = rtutil.RT(new_client, cachedb)
assert rt1.url(ticket_id) == expected
new_client.TICKET_DATA.clear()
rt2 = rtutil.RT(new_client, cachedb)
assert rt2.url(ticket_id) == expected
assert not rt2.exists(ticket_id + 1)
assert rt1 is not rt2
def test_no_shared_cache(new_client):
with new_cache() as cache1, new_cache() as cache2:
rt1 = rtutil.RT(new_client, cache1)
rt2 = rtutil.RT(new_client, cache2)
assert rt1.exists(1)
new_client.TICKET_DATA.clear()
assert not rt2.exists(1)
assert rt1.exists(1)
def test_read_only_cache(new_client, tmp_path):
db_path = tmp_path / 'test.db'
ticket_id, _, expected = EXPECTED_URLS[0]
expected = DEFAULT_RT_URL + expected
with new_cache(db_path) as cache1:
rt1 = rtutil.RT(new_client, cache1)
assert rt1.url(ticket_id) == expected
new_client.TICKET_DATA.clear()
db_path.chmod(0o400)
with new_cache(db_path) as cache2:
rt2 = rtutil.RT(new_client, cache2)
assert rt2.url(ticket_id) == expected
assert rt2.url(ticket_id + 1) is None
def test_results_not_found_only_in_transient_cache(new_client):
with new_cache() as cache:
rt1 = rtutil.RT(new_client, cache)
rt2 = rtutil.RT(new_client, cache)
assert not rt1.exists(9)
new_client.TICKET_DATA['9'] = [('99', '(Unnamed)', 'text/plain', '0b')]
assert not rt1.exists(9)
assert rt2.exists(9)