diff --git a/development/fossology/reports/20250114_sql_queries_to_undo_reuser.md b/development/fossology/reports/20250114_sql_queries_to_undo_reuser.md new file mode 100644 index 0000000000000000000000000000000000000000..0ad083448841bfbaeb9ccc2012d87e28dc10148c --- /dev/null +++ b/development/fossology/reports/20250114_sql_queries_to_undo_reuser.md @@ -0,0 +1,214 @@ +--- +title: SQL queries to undo reuser agent +author: Alberto Pianon <pianon@array.eu> +date: "2025-01-14" +lastmod: "2025-01-14" +SPDX-License-Identifier: CC-BY-SA-4.0 AND LicenseRef-Trademark-Notice +SPDX-FileCopyrightText: Alberto Pianon <pianon@array.eu> +--- + +We faced the problem of undoing a reuser agent's run for a specific upload in Fossology. The reuser agent had been automatically scheduled on that specific upload, reusing bulk rules and individual file conclusions from a previuos similar upload, but it turned out that such bulk rules and conclusions were not consistent with the current audit policy adopted for the project. + +Unfortunately, Fossology does not provide any undo functionality, so the data must be manually deleted from fossology's db, while preserving its consistency. + +In this specific case, not only the relevant clearing decisions and events, but also the monkbulk and decider jobs that generated them need to be deleted, otherwise Fossology will still list them in the bulk history and that may lead to inconsistencies and possible further unforeseable consequences. + +Since the only the clearing events table contains the event type (`BULK` or `USER`) that is needed to filter the items to delete, item in such table must be deleted last. + +Here are the queries to delete conclusions from reused bulk rules (`{upload_id}` is the upload id, while {uploadtree_tablename} is the uploadtree table name for that upload, which can be retrieved in the upload table - generally it's "uploadtree_a", but for very large uploads a dedicated table may be created, usually named "uploadtree_<upload_id>") + +```sql +-- 1 delete jobdepends +DELETE FROM jobdepends jd +WHERE jd.jdep_jq_depends_fk IN ( + SELECT jq.jq_pk FROM jobqueue jq + WHERE + jq.jq_type = 'monkbulk' + AND jq.jq_job_fk IN ( + SELECT DISTINCT ce.job_fk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 2 -- ClearingEventTypes::BULK + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + AND ce.job_fk IS NOT NULL + ) +); + +-- 2 delete jobqueues +DELETE FROM jobqueue jq +WHERE + jq.jq_job_fk IN ( + SELECT DISTINCT ce.job_fk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 2 -- ClearingEventTypes::BULK + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + AND ce.job_fk IS NOT NULL + ) +; + +-- 3 delete jobs + +DELETE FROM job +WHERE + job.job_pk IN ( + SELECT DISTINCT ce.job_fk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 2 -- ClearingEventTypes::BULK + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + AND ce.job_fk IS NOT NULL + ) +; + + +-- 4 delete clearing decisions +DELETE FROM clearing_decision cd WHERE cd.clearing_decision_pk IN +( + SELECT DISTINCT cde.clearing_decision_fk + FROM clearing_decision_event cde + WHERE cde.clearing_event_fk IN + ( + SELECT DISTINCT ce.clearing_event_pk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 2 -- ClearingEventTypes::BULK + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + ) +); + +-- 5 delete entries in clearing decision event table + +DELETE FROM clearing_decision_event cde +WHERE cde.clearing_event_fk IN +( + SELECT DISTINCT ce.clearing_event_pk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 2 -- ClearingEventTypes::BULK + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} +); + +-- 6 delete entries in clearing event table +DELETE FROM clearing_event ce1 +WHERE ce1.clearing_event_pk IN +( + SELECT DISTINCT ce.clearing_event_pk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 2 -- ClearingEventTypes::BULK + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} +); + +-- 7 delete reportimport_ars records for upload +DELETE FROM monkbulk_ars WHERE upload_fk = {upload_id}; +``` + + +Here are the queries to delete the reused individual file decisions: + + +```sql +-- 1 delete jobdepends +DELETE FROM jobdepends jd +WHERE jd.jdep_jq_depends_fk IN ( + SELECT jq.jq_pk FROM jobqueue jq + WHERE + jq.jq_type = 'reuser' + AND jq.jq_job_fk IN ( + SELECT DISTINCT ce.job_fk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 1 -- ClearingEventTypes::USER + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + AND ce.job_fk IS NOT NULL + ) +); +DELETE FROM jobdepends jd +WHERE jd.jdep_jq_fk IN ( + SELECT jq.jq_pk FROM jobqueue jq + WHERE + jq.jq_type = 'reuser' + AND jq.jq_job_fk IN ( + SELECT DISTINCT ce.job_fk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 1 -- ClearingEventTypes::USER + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + AND ce.job_fk IS NOT NULL + ) +); + +-- 2 delete jobqueues +DELETE FROM jobqueue jq1 +WHERE jq1.jq_pk IN ( + SELECT jq.jq_pk FROM jobqueue jq, job + WHERE + jq.jq_type IN ('reuser', 'decider') + AND jq.jq_job_fk = job.job_pk + AND job.job_upload_fk = {upload_id} +) +; + +-- 3 delete clearing decisions +DELETE FROM clearing_decision cd WHERE cd.clearing_decision_pk IN +( + SELECT DISTINCT cde.clearing_decision_fk + FROM clearing_decision_event cde + WHERE cde.clearing_event_fk IN + ( + SELECT DISTINCT ce.clearing_event_pk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 1 -- ClearingEventTypes::USER + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} + ) +); + +-- 4 delete entries in clearing decision event table +DELETE FROM clearing_decision_event cde +WHERE cde.clearing_event_fk IN +( + SELECT DISTINCT ce.clearing_event_pk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 1 -- ClearingEventTypes::USER + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} +); + +-- 5 delete entries in clearing event table +DELETE FROM clearing_event ce1 +WHERE ce1.clearing_event_pk IN +( + SELECT DISTINCT ce.clearing_event_pk + FROM clearing_event ce, {uploadtree_tablename} ut + WHERE + ut.uploadtree_pk = ce.uploadtree_fk + AND ce.type_fk = 1 -- ClearingEventTypes::USER + AND ce.user_fk = 3 -- user 'fossy', under which reuser agent had been scheduled + AND ut.upload_fk = {upload_id} +); + +-- 7 delete reportimport_ars records for upload +DELETE FROM reuser_ars WHERE upload_fk = {upload_id}; +``` \ No newline at end of file