Issue #1020
closedScheduled Tasks failure_treshold doesn't work
Description
The scheduler's failure_threshold
attribute doesn't work --- a Scheduled task failing more times consecutively doesn't get cancelled automatically as opposed to what the documentation specifies.
How reproducible: Always
Steps to reproduce
- create a scheduled unit installation with a malformed rpm unit key with a period of 1 minute
- use the fake automation consumer to report back related installation error
- wait 5 or more minutes
- check the number of tasks that were spawned for the consumer installation
- check the
total_run_count
of the scheduler object
Actual result:
- total count of tasks spawned for the installation is higher than
failure_threshold
scheduler attribute would imply -
consecutive_failures
attribute scheduler remains zero all the time
Expected results
- Spawned tasks consecutive failing count having reached the
failure_threshold
scheduler attribute value disables the scheduler as specified in the documentation -
consecutive_failures
scheduler attribute should eventually reach thefailure_threshold
attribute value - The Scheduler gets cancelled automatically when
consecutive_failures
reachesfailure_threshold
attribute value
See also: Issue #1019
Reproducer screen log
The scheduled installation object
[root@ip-172-31-20-145 ~]# curl -k https://admin:admin@localhost/pulp/api/v2/consumers/SimpleScheduledInstall_consumer/schedules/content/install/ | python -mjson.tool
[
{
"_href": "/pulp/api/v2/consumers/SimpleScheduledInstall_consumer/schedules/content/install/556c6a642bcb1c455d5ce1ef/",
"_id": "556c6a642bcb1c455d5ce1ef",
"args": [
"SimpleScheduledInstall_consumer"
],
"consecutive_failures": 0,
"enabled": true,
"failure_threshold": 3,
"first_run": "2015-06-01T14:21:24Z",
"kwargs": {
"options": null,
"units": [
{
"type_id": "rpm",
"unit_key": "zebra"
}
]
},
"last_run_at": "2015-06-01T14:22:02Z",
"last_updated": 1433168484.7911601,
"next_run": "2015-06-01T14:22:24Z",
"options": null,
"remaining_runs": null,
"resource": "pulp:consumer:SimpleScheduledInstall_consumer",
"schedule": "PT1M",
"task": "pulp.server.tasks.consumer.install_content",
"total_run_count": 1,
"units": [
{
"type_id": "rpm",
"unit_key": "zebra"
}
]
}
]
The scheduler having been spawning failing tasks for 5 minutes
[root@ip-172-31-20-145 ~]# sleep 300; curl -k https://admin:admin@localhost/pulp/api/v2/consumers/SimpleScheduledInstall_consumer/schedules/content/install/ | python -mjson.tool
[
{
"_href": "/pulp/api/v2/consumers/SimpleScheduledInstall_consumer/schedules/content/install/556c6a642bcb1c455d5ce1ef/",
"_id": "556c6a642bcb1c455d5ce1ef",
"args": [
"SimpleScheduledInstall_consumer"
],
"consecutive_failures": 0,
"enabled": true,
"failure_threshold": 3,
"first_run": "2015-06-01T14:21:24Z",
"kwargs": {
"options": null,
"units": [
{
"type_id": "rpm",
"unit_key": "zebra"
}
]
},
"last_run_at": "2015-06-01T14:26:24Z",
"last_updated": 1433168484.7911601,
"next_run": "2015-06-01T14:27:24Z",
"options": null,
"remaining_runs": null,
"resource": "pulp:consumer:SimpleScheduledInstall_consumer",
"schedule": "PT1M",
"task": "pulp.server.tasks.consumer.install_content",
"total_run_count": 6,
"units": [
{
"type_id": "rpm",
"unit_key": "zebra"
}
]
}
]
Fetching the failing tasks from Pulp
[root@ip-172-31-20-145 ~]# curl -k https://admin:admin@localhost/pulp/api/v2/tasks/ | python -mjson.tool > all_tasks.json
Showing single failing task
{
"_href": "/pulp/api/v2/tasks/74a5846b-3803-4651-b1ff-08499f1b2400/",
"_id": {
"$oid": "556c6bcc2bcb1c236e803949"
},
"_ns": "task_status",
"error": null,
"exception": null,
"finish_time": "2015-06-01T14:27:24Z",
"id": "556c6bcc2bcb1c236e803949",
"progress_report": null,
"result": null,
"spawned_tasks": [],
"start_time": "2015-06-01T14:27:24Z",
"state": "error",
"tags": [
"pulp:consumer:SimpleScheduledInstall_consumer",
"pulp:action:unit_install"
],
"task_id": "74a5846b-3803-4651-b1ff-08499f1b2400",
"task_type": null,
"traceback": "Traceback (most recent call last):\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/agent.py\", line 137, in __call__\n response = self.request_to_call(self.module, request, self.PROFILE)()\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/agent.py\", line 118, in <lambda>\n **dict(list(request['kws'].viewitems()) + [('PROFILE', PROFILE)])\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/handler.py\", line 19, in logged_wrapper\n ret = method(self, *args, **kvs)\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/handler.py\", line 47, in wrapped_method\n units\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/handler.py\", line 45, in <lambda>\n PROFILE\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/content_handler.py\", line 29, in install\n unit_type.store(unit, PROFILE)\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/rpm_unit.py\", line 48, in store\n if Rpm.list(unit, PROFILE) == []:\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/rpm_unit.py\", line 36, in list\n nevra = Rpm.unit_to_nevra(unit)\n File \"/home/mkovacik/src/pulp-automation/pulp_auto/handler/rpm_unit.py\", line 9, in unit_to_nevra\n assert type(request.unit_key) is Namespace, 'unsupported unit key type: %s' % type(request.unit_key)\nAssertionError: unsupported unit key type: <type 'unicode'>\n",
"worker_name": "agent"
}
Counting number of failing tasks
[root@ip-172-31-20-145 ~]# csplit all_tasks.json '%2015-06-01T14:22:02Z%'
[root@ip-172-31-20-145 ~]# grep 'line 9, in unit_to_nevra' xx00 | wc -l
7
[root@ip-172-31-20-145 ~]#