Project

Profile

Help

Task #2449 » consumer_applicability_stats.py

To get applicability related stats from db - ttereshc, 07/04/2017 12:14 AM

 
import logging
import logging.handlers

from pulp.server.db import connection

_logger = logging.getLogger(__name__)
_logger.setLevel(logging.INFO)
_logger.addHandler(logging.handlers.SysLogHandler(address='/dev/log'))

connection.initialize()
db = connection.get_database()
consumers_c = db['consumers']
bindings_c = db['consumer_bindings']
profiles_c = db['consumer_unit_profiles']
repos_c = db['repos']
rpa_c = db['repo_profile_applicability']
rcu_c = db['repo_content_units']


def get_consumer_stats():
unique_consumers_in_profiles = profiles_c.distinct('consumer_id')
consumers_in_consumers = consumers_c.distinct('id')
no_profile_consumers = list(set(consumers_in_consumers) - set(unique_consumers_in_profiles))
unique_profiles = profiles_c.distinct('profile_hash')
profiles_count = len(unique_profiles)
profiles_packages_stats = []
packages_set = set()
for profile in unique_profiles:
profile = profiles_c.find({'profile_hash': profile}, projection=['profile'])[0]
profiles_packages_stats.append(len(profile['profile']))
packages_set.update(tuple(pkg.values())for pkg in profile['profile'])

consumer_bindings = []
consumer_bindings_stats = []
for con in consumers_c.find(projection=['id']):
con_bind = bindings_c.find({'consumer_id': con['id']}, projection=['repo_id'])
con_bind = [c['repo_id'] for c in con_bind]
if con_bind:
consumer_bindings.append(con_bind)
consumer_bindings_stats.append(len(con_bind))

_logger.info('=== Consumers ===')
_logger.info('Consumers: %s' % consumers_c.count())
_logger.info('Consumers with profile: %s' % len(unique_consumers_in_profiles))
_logger.info('Consumers without profile: %s' % len(no_profile_consumers))
_logger.info('Consumer profiles, total: %s' % profiles_count)
_logger.info('Consumer profiles, min/avg/max num of packages: %s/%s/%s' % (
min(profiles_packages_stats),
sum(profiles_packages_stats)/profiles_count,
max(profiles_packages_stats)))
_logger.info('Consumer profiles, total unique papckages: %s' % len(packages_set))
_logger.info('Consumers with bindings, total: %s' % len(consumer_bindings_stats))
_logger.info('Consumer bindings, total: %s' % bindings_c.count())
_logger.info('Consumer bindings, min/avg/max per consumer: %s/%s/%s' % (
min(consumer_bindings_stats),
sum(consumer_bindings_stats)/len(consumer_bindings_stats),
max(consumer_bindings_stats)))


def get_repository_stats():
repos_repo_ids = rpa_c.distinct('repo_id')
repo_rpm_counts = []
repo_erratum_counts = []
for repo_id in repos_repo_ids:
rpm_count = rcu_c.find({'repo_id': repo_id, 'unit_type_id': 'rpm'}).count()
erratum_count = rcu_c.find({'repo_id': repo_id, 'unit_type_id': 'erratum'}).count()
repo_rpm_counts.append(rpm_count)
repo_erratum_counts.append(erratum_count)

_logger.info('=== Repositories ===')
_logger.info('Repositories, total: %s' % len(repos_repo_ids))
_logger.info('Repositories, N of RPMs, min/avg/max: %s/%s/%s' % (
min(repo_rpm_counts),
sum(repo_rpm_counts)/len(repo_rpm_counts),
max(repo_rpm_counts)))
_logger.info('Repositories, N of errata, min/avg/max: %s/%s/%s' % (
min(repo_erratum_counts),
sum(repo_erratum_counts)/len(repo_erratum_counts),
max(repo_erratum_counts)))


def get_applicability_stats():
rpa_repo_ids = rpa_c.distinct('repo_id')
repos_repo_ids = repos_c.distinct('repo_id')
missing_repo_ids = list(set(rpa_repo_ids) - set(repos_repo_ids))
rpa_orphaned_by_repo_id = rpa_c.find({'repo_id': {'$in': missing_repo_ids}})
rpa_profiles = rpa_c.distinct('profile_hash')
profiles = profiles_c.distinct('profile_hash')
missing_profiles = list(set(rpa_profiles) - set(profiles))
rpa_orphaned_by_profile = rpa_c.find({'profile_hash': {'$in': missing_profiles}})

actual_applicability_profiles = rpa_c.find({'profile_hash': {'$in': profiles}})

_logger.info('=== Applicability ===')
_logger.info('Repositories, total: %s' % len(repos_repo_ids))
_logger.info('Repositories in applicability profiles: %s' % len(rpa_repo_ids))
_logger.info('Repositories not in applicability profiles: %s' % len(missing_repo_ids))
_logger.info('Consumer profiles, total: %s' % len(profiles))
_logger.info('Consumer profiles with applicability: %s' % len(rpa_profiles))
_logger.info('Consumer profiles without applicability: %s' % len(missing_profiles))
_logger.info('Applicability profiles, total: %s' % rpa_c.count())
_logger.info('Actual applicability profiles: %s' % actual_applicability_profiles.count())
_logger.info('Orphaned applicability by repo: %s' % rpa_orphaned_by_repo_id.count())
_logger.info('Orphaned applicability by consumer profile: %s' % rpa_orphaned_by_profile.count())


if __name__ == '__main__':
get_consumer_stats()
get_repository_stats()
get_applicability_stats()
(1-1/2)