rtutil: Add RTLinkCache class to cache links to disk.
This will greatly reduce RT requests across multiple runs and speed up link checking/conversion.
This commit is contained in:
parent
f227593655
commit
a8407c7b6a
4 changed files with 236 additions and 6 deletions
|
@ -126,8 +126,16 @@ class Config:
|
|||
wrapper_client = self.rt_client(credentials, client)
|
||||
if wrapper_client is None:
|
||||
return None
|
||||
cache_dir_path = self.cache_dir_path()
|
||||
if cache_dir_path is None:
|
||||
cache_db = None
|
||||
else:
|
||||
return rtutil.RT(wrapper_client)
|
||||
cache_name = '{}@{}.sqlite3'.format(
|
||||
credentials.user,
|
||||
urlparse.quote(str(credentials.server), ''),
|
||||
)
|
||||
cache_db = rtutil.RTLinkCache.setup(cache_dir_path / cache_name)
|
||||
return rtutil.RT(wrapper_client, cache_db)
|
||||
|
||||
def rt_wrapper(self,
|
||||
credentials: RTCredentials=None,
|
||||
|
|
|
@ -17,17 +17,142 @@
|
|||
import functools
|
||||
import mimetypes
|
||||
import re
|
||||
import sqlite3
|
||||
import urllib.parse as urlparse
|
||||
|
||||
import rt
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from typing import (
|
||||
Callable,
|
||||
Iterator,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
RTId = Union[int, str]
|
||||
TicketAttachmentIds = Tuple[str, Optional[str]]
|
||||
_LinkCache = MutableMapping[TicketAttachmentIds, Optional[str]]
|
||||
|
||||
class RTLinkCache(_LinkCache):
|
||||
"""Cache RT links to disk
|
||||
|
||||
This class provides a dict-like interface to a cache of RT links.
|
||||
Once an object is in RT, a link to it should never change.
|
||||
The only exception is when objects get shredded, and those objects
|
||||
shouldn't be referenced in books anyway.
|
||||
|
||||
This implementation is backed by a sqlite database. You can call::
|
||||
|
||||
db = RTLinkCache.setup(path)
|
||||
|
||||
This method will try to open a sqlite database at the given path,
|
||||
and set up necessary tables, etc.
|
||||
If it succeeds, it returns a database connection you can use to
|
||||
initialize the cache.
|
||||
If it fails, it returns None, and the caller should use some other
|
||||
dict-like object (like a normal dict) for caching.
|
||||
You can give the result to the RT utility class either way,
|
||||
and it will do the right thing for itself::
|
||||
|
||||
rt = RT(rt_client, db)
|
||||
"""
|
||||
|
||||
CREATE_TABLE_SQL = """CREATE TABLE IF NOT EXISTS RTLinkCache(
|
||||
ticket_id TEXT NOT NULL,
|
||||
attachment_id TEXT,
|
||||
url TEXT NOT NULL,
|
||||
PRIMARY KEY (ticket_id, attachment_id)
|
||||
)"""
|
||||
|
||||
@classmethod
|
||||
def setup(cls, cache_path: Path) -> Optional[sqlite3.Connection]:
|
||||
try:
|
||||
db = sqlite3.connect(cache_path, isolation_level=None)
|
||||
cursor = db.cursor()
|
||||
cursor.execute(cls.CREATE_TABLE_SQL)
|
||||
cursor.execute('SELECT url FROM RTLinkCache LIMIT 1')
|
||||
have_data = cursor.fetchone() is not None
|
||||
except sqlite3.OperationalError:
|
||||
# If we couldn't get this far, sqlite provides no benefit.
|
||||
return None
|
||||
try:
|
||||
# There shouldn't be any records where url is NULL, so running this
|
||||
# DELETE pulls double duty for us: it tells us whether or not we
|
||||
# can write to the database and it enforces database integrity.
|
||||
cursor.execute('DELETE FROM RTLinkCache WHERE url IS NULL')
|
||||
except sqlite3.OperationalError:
|
||||
can_write = False
|
||||
else:
|
||||
can_write = True
|
||||
print(cache_path, have_data, can_write)
|
||||
if not (can_write or have_data):
|
||||
# If there's nothing to read and no way to write, sqlite provides
|
||||
# no benefit.
|
||||
return None
|
||||
elif not can_write:
|
||||
# Set up an in-memory database that we can write to, seeded with
|
||||
# the data available to read.
|
||||
try:
|
||||
cursor.close()
|
||||
db.close()
|
||||
db = sqlite3.connect(':memory:', isolation_level=None)
|
||||
cursor = db.cursor()
|
||||
cursor.execute('ATTACH DATABASE ? AS readsource',
|
||||
('{}?mode=ro'.format(cache_path.as_uri()),))
|
||||
cursor.execute(cls.CREATE_TABLE_SQL)
|
||||
cursor.execute('INSERT INTO RTLinkCache SELECT * FROM readsource.RTLinkCache')
|
||||
cursor.execute('DETACH DATABASE readsource')
|
||||
except sqlite3.OperationalError as error:
|
||||
# We're back to the case of having nothing to read and no way
|
||||
# to write.
|
||||
return None
|
||||
cursor.close()
|
||||
db.commit()
|
||||
return db
|
||||
|
||||
def __init__(self, cache_db: sqlite3.Connection) -> None:
|
||||
self._db = cache_db
|
||||
self._nourls: Set[TicketAttachmentIds] = set()
|
||||
|
||||
def __iter__(self) -> Iterator[TicketAttachmentIds]:
|
||||
yield from self._db.execute('SELECT ticket_id, attachment_id FROM RTLinkCache')
|
||||
yield from self._nourls
|
||||
|
||||
def __len__(self) -> int:
|
||||
cursor = self._db.execute('SELECT COUNT(*) FROM RTLinkCache')
|
||||
return cursor.fetchone()[0] + len(self._nourls)
|
||||
|
||||
def __getitem__(self, key: TicketAttachmentIds) -> Optional[str]:
|
||||
if key in self._nourls:
|
||||
return None
|
||||
cursor = self._db.execute(
|
||||
'SELECT url FROM RTLinkCache WHERE ticket_id = ? AND attachment_id IS ?',
|
||||
key,
|
||||
)
|
||||
retval = cursor.fetchone()
|
||||
if retval is None:
|
||||
raise KeyError(key)
|
||||
else:
|
||||
return retval[0]
|
||||
|
||||
def __setitem__(self, key: TicketAttachmentIds, value: Optional[str]) -> None:
|
||||
if value is None:
|
||||
self._nourls.add(key)
|
||||
else:
|
||||
ticket_id, attachment_id = key
|
||||
self._db.execute(
|
||||
'INSERT INTO RTLinkCache VALUES(?, ?, ?)',
|
||||
(ticket_id, attachment_id, value),
|
||||
)
|
||||
|
||||
def __delitem__(self, key: TicketAttachmentIds) -> None:
|
||||
raise NotImplementedError("RTLinkCache.__delitem__")
|
||||
|
||||
|
||||
class RT:
|
||||
"""RT utility wrapper class
|
||||
|
@ -38,7 +163,9 @@ class RT:
|
|||
* Parse links
|
||||
* Verify that they refer to extant objects in RT
|
||||
* Convert metadata links to RT web links
|
||||
* Cache results, to reduce network requests
|
||||
* Cache results, to reduce network requests.
|
||||
You can set up an RTLinkCache to cache links to disks over multiple runs.
|
||||
Refer to RTLinkCache's docstring for details and instructions.
|
||||
"""
|
||||
|
||||
PARSE_REGEXPS = [
|
||||
|
@ -46,8 +173,7 @@ class RT:
|
|||
re.compile(r'^rt://ticket/([0-9]+)(?:/attachments?/([0-9]+))?/?$'),
|
||||
]
|
||||
|
||||
def __init__(self, rt_client: rt.Rt) -> None:
|
||||
self.rt = rt_client
|
||||
def __init__(self, rt_client: rt.Rt, cache_db: Optional[sqlite3.Connection]=None) -> None:
|
||||
urlparts = urlparse.urlparse(rt_client.url)
|
||||
try:
|
||||
index = urlparts.path.rindex('/REST/')
|
||||
|
@ -56,6 +182,32 @@ class RT:
|
|||
else:
|
||||
base_path = urlparts.path[:index + 1]
|
||||
self.url_base = urlparts._replace(path=base_path)
|
||||
self.rt = rt_client
|
||||
self._cache: _LinkCache
|
||||
if cache_db is None:
|
||||
self._cache = {}
|
||||
else:
|
||||
self._cache = RTLinkCache(cache_db)
|
||||
|
||||
# mypy complains that the first argument isn't self, but this isn't meant
|
||||
# to be a method, it's just an internal decrator.
|
||||
def _cache_method(func: Callable) -> Callable: # type:ignore[misc]
|
||||
@functools.wraps(func)
|
||||
def caching_wrapper(self,
|
||||
ticket_id: RTId,
|
||||
attachment_id: Optional[RTId]=None,
|
||||
) -> str:
|
||||
cache_key = (str(ticket_id), attachment_id and str(attachment_id))
|
||||
try:
|
||||
url = self._cache[cache_key]
|
||||
except KeyError:
|
||||
if attachment_id is None:
|
||||
url = func(self, ticket_id)
|
||||
else:
|
||||
url = func(self, ticket_id, attachment_id)
|
||||
self._cache[cache_key] = url
|
||||
return url
|
||||
return caching_wrapper
|
||||
|
||||
def _extend_url(self,
|
||||
path_tail: str,
|
||||
|
@ -84,7 +236,7 @@ class RT:
|
|||
fragment = 'txn-{}'.format(txn_id)
|
||||
return self._extend_url('Ticket/Display.html', fragment, id=str(ticket_id))
|
||||
|
||||
@functools.lru_cache()
|
||||
@_cache_method
|
||||
def attachment_url(self, ticket_id: RTId, attachment_id: RTId) -> Optional[str]:
|
||||
attachment = self.rt.get_attachment(ticket_id, attachment_id)
|
||||
if attachment is None:
|
||||
|
@ -118,7 +270,7 @@ class RT:
|
|||
return (ticket_id, attachment_id)
|
||||
return None
|
||||
|
||||
@functools.lru_cache()
|
||||
@_cache_method
|
||||
def ticket_url(self, ticket_id: RTId) -> Optional[str]:
|
||||
if self.rt.get_ticket(ticket_id) is None:
|
||||
return None
|
||||
|
|
|
@ -207,6 +207,22 @@ def test_rt_wrapper_cache_responds_to_external_credential_changes(rt_environ):
|
|||
rt2 = config.rt_wrapper(None, testutil.RTClient)
|
||||
assert rt1 is not rt2
|
||||
|
||||
def test_rt_wrapper_has_cache(tmp_path):
|
||||
with update_environ(XDG_CACHE_DIR=tmp_path):
|
||||
config = config_mod.Config()
|
||||
rt = config.rt_wrapper(None, testutil.RTClient)
|
||||
rt.exists(1)
|
||||
expected = 'conservancy_beancount/{}@*.sqlite3'.format(RT_FILE_CREDS[1])
|
||||
assert any(tmp_path.glob(expected))
|
||||
|
||||
def test_rt_wrapper_without_cache(tmp_path):
|
||||
tmp_path.chmod(0)
|
||||
with update_environ(XDG_CACHE_DIR=tmp_path):
|
||||
config = config_mod.Config()
|
||||
rt = config.rt_wrapper(None, testutil.RTClient)
|
||||
tmp_path.chmod(0o600)
|
||||
assert not any(tmp_path.iterdir())
|
||||
|
||||
def test_cache_mkdir(tmp_path):
|
||||
expected = tmp_path / 'TESTcache'
|
||||
with update_environ(XDG_CACHE_DIR=tmp_path):
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
import contextlib
|
||||
|
||||
import pytest
|
||||
|
||||
from . import testutil
|
||||
|
@ -46,6 +48,14 @@ def new_client():
|
|||
TICKET_DATA = testutil.RTClient.TICKET_DATA.copy()
|
||||
return RTClient()
|
||||
|
||||
def new_cache(database=':memory:'):
|
||||
db = rtutil.RTLinkCache.setup(database)
|
||||
if db is None:
|
||||
print("NOTE: did not set up database cache at {}".format(database))
|
||||
return contextlib.nullcontext(db)
|
||||
else:
|
||||
return contextlib.closing(db)
|
||||
|
||||
@pytest.mark.parametrize('ticket_id,attachment_id,expected', EXPECTED_URLS)
|
||||
def test_url(rt, ticket_id, attachment_id, expected):
|
||||
if expected is not None:
|
||||
|
@ -125,3 +135,47 @@ def test_uncommon_server_url_parsing():
|
|||
client = testutil.RTClient(url + 'REST/1.0/')
|
||||
rt = rtutil.RT(client)
|
||||
assert rt.url(1).startswith(url)
|
||||
|
||||
def test_shared_cache(new_client):
|
||||
ticket_id, _, expected = EXPECTED_URLS[0]
|
||||
expected = DEFAULT_RT_URL + expected
|
||||
with new_cache() as cachedb:
|
||||
rt1 = rtutil.RT(new_client, cachedb)
|
||||
assert rt1.url(ticket_id) == expected
|
||||
new_client.TICKET_DATA.clear()
|
||||
rt2 = rtutil.RT(new_client, cachedb)
|
||||
assert rt2.url(ticket_id) == expected
|
||||
assert not rt2.exists(ticket_id + 1)
|
||||
assert rt1 is not rt2
|
||||
|
||||
def test_no_shared_cache(new_client):
|
||||
with new_cache() as cache1, new_cache() as cache2:
|
||||
rt1 = rtutil.RT(new_client, cache1)
|
||||
rt2 = rtutil.RT(new_client, cache2)
|
||||
assert rt1.exists(1)
|
||||
new_client.TICKET_DATA.clear()
|
||||
assert not rt2.exists(1)
|
||||
assert rt1.exists(1)
|
||||
|
||||
def test_read_only_cache(new_client, tmp_path):
|
||||
db_path = tmp_path / 'test.db'
|
||||
ticket_id, _, expected = EXPECTED_URLS[0]
|
||||
expected = DEFAULT_RT_URL + expected
|
||||
with new_cache(db_path) as cache1:
|
||||
rt1 = rtutil.RT(new_client, cache1)
|
||||
assert rt1.url(ticket_id) == expected
|
||||
new_client.TICKET_DATA.clear()
|
||||
db_path.chmod(0o400)
|
||||
with new_cache(db_path) as cache2:
|
||||
rt2 = rtutil.RT(new_client, cache2)
|
||||
assert rt2.url(ticket_id) == expected
|
||||
assert rt2.url(ticket_id + 1) is None
|
||||
|
||||
def test_results_not_found_only_in_transient_cache(new_client):
|
||||
with new_cache() as cache:
|
||||
rt1 = rtutil.RT(new_client, cache)
|
||||
rt2 = rtutil.RT(new_client, cache)
|
||||
assert not rt1.exists(9)
|
||||
new_client.TICKET_DATA['9'] = [('99', '(Unnamed)', 'text/plain', '0b')]
|
||||
assert not rt1.exists(9)
|
||||
assert rt2.exists(9)
|
||||
|
|
Loading…
Reference in a new issue