mirror of
https://github.com/valitydev/redash.git
synced 2024-11-07 01:25:16 +00:00
Job to cleanup unused query results
This commit is contained in:
parent
2278a181ca
commit
e116e88e98
@ -100,6 +100,7 @@ def status_api():
|
||||
status['version'] = __version__
|
||||
status['queries_count'] = models.Query.select().count()
|
||||
status['query_results_count'] = models.QueryResult.select().count()
|
||||
status['unused_query_results_count'] = models.QueryResult.unused().count()
|
||||
status['dashboards_count'] = models.Dashboard.select().count()
|
||||
status['widgets_count'] = models.Widget.select().count()
|
||||
|
||||
|
@ -225,6 +225,15 @@ class QueryResult(BaseModel):
|
||||
'retrieved_at': self.retrieved_at
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def unused(cls):
|
||||
week_ago = datetime.datetime.now() - datetime.timedelta(days=7)
|
||||
|
||||
unused_results = cls.select().where(Query.id == None, cls.retrieved_at < week_ago)\
|
||||
.join(Query, join_type=peewee.JOIN_LEFT_OUTER)
|
||||
|
||||
return unused_results
|
||||
|
||||
@classmethod
|
||||
def get_latest(cls, data_source, query, ttl=0):
|
||||
query_hash = utils.gen_query_hash(query)
|
||||
|
@ -56,6 +56,10 @@ CELERY_BROKER = os.environ.get("REDASH_CELERY_BROKER", REDIS_URL)
|
||||
CELERY_BACKEND = os.environ.get("REDASH_CELERY_BACKEND", REDIS_URL)
|
||||
CELERY_FLOWER_URL = os.environ.get("REDASH_CELERY_FLOWER_URL", "/flower")
|
||||
|
||||
# The following enables periodic job (every 5 minutes) of removing unused query results. Behind this "feature flag" until
|
||||
# proved to be "safe".
|
||||
QUERY_RESULTS_CLEANUP_ENABLED = parse_boolean(os.environ.get("REDASH_QUERY_RESULTS_CLEANUP_ENABLED", "false"))
|
||||
|
||||
# Google Apps domain to allow access from; any user with email in this Google Apps will be allowed
|
||||
# access
|
||||
GOOGLE_APPS_DOMAIN = os.environ.get("REDASH_GOOGLE_APPS_DOMAIN", "")
|
||||
|
@ -202,6 +202,22 @@ def cleanup_tasks():
|
||||
redis_connection.delete(lock_keys[i])
|
||||
|
||||
|
||||
@celery.task(base=BaseTask)
|
||||
def cleanup_query_results():
|
||||
"""
|
||||
Job to cleanup unused query results -- such that no query links to them anymore, and older than a week (so it's less
|
||||
likely to be open in someone's browser and be used).
|
||||
|
||||
Each time the job deletes only 100 query results so it won't choke the database in case of many such results.
|
||||
"""
|
||||
|
||||
unused_query_results = models.QueryResult.unused().limit(100)
|
||||
total_unused_query_results = models.QueryResult.unused().count()
|
||||
deleted_count = models.QueryResult.delete().where(models.QueryResult.id << unused_query_results).execute()
|
||||
|
||||
logger.info("Deleted %d unused query results out of total of %d." % (deleted_count, total_unused_query_results))
|
||||
|
||||
|
||||
@celery.task(bind=True, base=BaseTask, track_started=True)
|
||||
def execute_query(self, query, data_source_id):
|
||||
# TODO: maybe this should be a class?
|
||||
|
@ -7,19 +7,26 @@ celery = Celery('redash',
|
||||
broker=settings.CELERY_BROKER,
|
||||
include='redash.tasks')
|
||||
|
||||
celery.conf.update(CELERY_RESULT_BACKEND=settings.CELERY_BACKEND,
|
||||
CELERYBEAT_SCHEDULE={
|
||||
'refresh_queries': {
|
||||
'task': 'redash.tasks.refresh_queries',
|
||||
'schedule': timedelta(seconds=30)
|
||||
},
|
||||
'cleanup_tasks': {
|
||||
'task': 'redash.tasks.cleanup_tasks',
|
||||
'schedule': timedelta(minutes=5)
|
||||
},
|
||||
},
|
||||
CELERY_TIMEZONE='UTC')
|
||||
celery_schedule = {
|
||||
'refresh_queries': {
|
||||
'task': 'redash.tasks.refresh_queries',
|
||||
'schedule': timedelta(seconds=30)
|
||||
},
|
||||
'cleanup_tasks': {
|
||||
'task': 'redash.tasks.cleanup_tasks',
|
||||
'schedule': timedelta(minutes=5)
|
||||
}
|
||||
}
|
||||
|
||||
if settings.QUERY_RESULTS_CLEANUP_ENABLED:
|
||||
celery_schedule['cleanup_query_results'] = {
|
||||
'task': 'redash.tasks.cleanup_query_results',
|
||||
'schedule': timedelta(minutes=5)
|
||||
}
|
||||
|
||||
celery.conf.update(CELERY_RESULT_BACKEND=settings.CELERY_BACKEND,
|
||||
CELERYBEAT_SCHEDULE=celery_schedule,
|
||||
CELERY_TIMEZONE='UTC')
|
||||
|
||||
if __name__ == '__main__':
|
||||
celery.start()
|
@ -129,6 +129,25 @@ class QueryResultTest(BaseTestCase):
|
||||
self.assertEqual(found_query_result.id, qr.id)
|
||||
|
||||
|
||||
class TestUnusedQueryResults(BaseTestCase):
|
||||
def test_returns_only_unused_query_results(self):
|
||||
two_weeks_ago = datetime.datetime.now() - datetime.timedelta(days=14)
|
||||
qr = query_result_factory.create()
|
||||
query = query_factory.create(latest_query_data=qr)
|
||||
unused_qr = query_result_factory.create(retrieved_at=two_weeks_ago)
|
||||
|
||||
self.assertIn(unused_qr, models.QueryResult.unused())
|
||||
self.assertNotIn(qr, models.QueryResult.unused())
|
||||
|
||||
def test_returns_only_over_a_week_old_results(self):
|
||||
two_weeks_ago = datetime.datetime.now() - datetime.timedelta(days=14)
|
||||
unused_qr = query_result_factory.create(retrieved_at=two_weeks_ago)
|
||||
new_unused_qr = query_result_factory.create()
|
||||
|
||||
self.assertIn(unused_qr, models.QueryResult.unused())
|
||||
self.assertNotIn(new_unused_qr, models.QueryResult.unused())
|
||||
|
||||
|
||||
class TestQueryResultStoreResult(BaseTestCase):
|
||||
def setUp(self):
|
||||
super(TestQueryResultStoreResult, self).setUp()
|
||||
|
Loading…
Reference in New Issue
Block a user