관리-도구
편집 파일: v1_db_migrator.py
# coding=utf-8 # # Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2019 All Rights Reserved # # Licensed under CLOUD LINUX LICENSE AGREEMENT # http://cloudlinux.com/docs/LICENSE.TXT import logging import os import time from datetime import datetime, timedelta import sqlalchemy from sqlalchemy import Column, Float, Integer, String, func, insert from sqlalchemy.exc import DatabaseError, SQLAlchemyError from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from lvestats.core.plugin import LveStatsPlugin, LveStatsPluginTerminated from lvestats.lib import uidconverter from lvestats.lib.commons.dateutil import gm_datetime_to_unixtimestamp from lvestats.lib.commons.func import get_chunks from lvestats.lib.dbengine import fix_lost_keep_alive, validate_database from lvestats.orm.history import history, history_x60 from lvestats.orm.history_gov import history_gov STATE_FILE = '/var/lve/v1_migration_last.ts' V2_KEYS = [ 'id', 'mem', 'mem_limit', 'mem_fault', 'memphy', 'lmemphy', 'memphy_fault', 'mep', 'mep_limit', 'mep_fault', 'nproc', 'lnproc', 'nproc_fault', 'iops', 'liops', ] V2_GOV_KEYS = [ 'username', 'sum_cpu', 'sum_write', 'sum_read', 'limit_cpu_on_period_end', 'limit_read_on_period_end', 'limit_write_on_period_end', 'cause_of_restrict', ] V1Base = declarative_base() class V1HistoryGov(V1Base): """ Mapping out v1 gov history table """ __tablename__ = 'history_gov' ts = Column('ts', Integer, primary_key=True) username = Column('username', String(64), primary_key=True) sum_cpu = Column('sum_cpu', Float) sum_write = Column('sum_write', Float) sum_read = Column('sum_read', Float) limit_cpu_on_period_end = Column('limit_cpu_on_period_end', Integer) limit_read_on_period_end = Column('limit_read_on_period_end', Integer) limit_write_on_period_end = Column('limit_write_on_period_end', Integer) cause_of_restrict = Column('cause_of_restrict', Integer) server_id = Column('server_id', String(10), primary_key=True) weight = Column('weight', Integer) class V1History(V1Base): """ Mapping out v1 history table """ __tablename__ = 'history' id = Column('id', Integer, primary_key=True) cpu = Column('cpu', Integer) cpu_limit = Column('cpu_limit', Integer) cpu_max = Column('cpu_max', Integer) ncpu = Column('ncpu', Integer) mep = Column('mep', Integer) mep_limit = Column('mep_limit', Integer) mep_max = Column('mep_max', Integer) io = Column('io', Integer) io_max = Column('io_max', Integer) io_limit = Column('io_limit', Integer) mem = Column('mem', Integer) mem_limit = Column('mem_limit', Integer) mem_max = Column('mem_max', Integer) mem_fault = Column('mem_fault', Integer) mep_fault = Column('mep_fault', Integer) created = Column('created', sqlalchemy.types.DateTime, primary_key=True) weight = Column('weight', Integer) server_id = Column('server_id', String(10)) lmemphy = Column('lmemphy', Integer) memphy = Column('memphy', Integer) memphy_max = Column('memphy_max', Integer) memphy_fault = Column('memphy_fault', Integer) lnproc = Column('lnproc', Integer) nproc = Column('nproc', Integer) nproc_max = Column('nproc_max', Integer) nproc_fault = Column('nproc_fault', Integer) iops = Column('iops', Integer) iops_max = Column('iops_max', Integer) liops = Column('liops', Integer) class V1TimeInterval(object): """ The way it would work - on first run, the /var/lve/v1_migration_last.ts will be non-existant, and we will use latest timestamp from V1 db as the 'starting point' After that on each call of get_data we will use that 'starting point' to get_period from start point to 1 hour before. As soon as our start point is > 30 days old -- we will return as part of get_period third parameter true which means that ok, the rest of data is too old, lets move on. V1DBMigrator will convert data for that period, and then will call save_state(from) -- this will be new starting point for the next plugin run. We will store it in a property (last_ts), and save it to the file. So, that even if software restarted, we don't just ignore it. """ def __init__(self, v1session, ts_file=STATE_FILE, server_id='localhost'): self.ts_file = ts_file self.server_id = server_id self.last_ts = None self.last_uid = -1 self.v1session = v1session self.read_state() def save_ts_to_file(self, ts, uid=None): with open(self.ts_file, 'w', encoding='utf-8') as f: f.write(ts.strftime(self.get_ts_format())) self.last_ts = ts if uid is not None: f.write('\n' + str(uid)) self.last_uid = uid or -1 f.close() @staticmethod def get_ts_format(): return "%Y-%m-%d %H:%M:%S.%f" def save_timestamp(self, ts): self._save_state(ts) def save_uid(self, uid=None): self._save_state(self.last_ts, uid) def _save_state(self, ts, uid=None): try: self.save_ts_to_file(ts, uid) except IOError as e: logging.getLogger('plugin.V1DBMigrator.TimeInterval').error("Unable to save v1 migration TS %s", str(e)) def _read_state(self): ts = None try: with open(self.ts_file, 'r', encoding='utf-8') as f: ts = datetime.strptime(f.readline().rstrip(), self.get_ts_format()) uid = int(f.readline().rstrip() or -1) return ts, uid except IOError: return ts, -1 except ValueError as e: logging.getLogger('plugin.V1DBMigrator.TimeInterval').warning( "Unable to read %s (%s)", self.ts_file, e, ) return ts, -1 def read_state(self): self.last_ts, self.last_uid = self._read_state() if self.last_ts is None: res = ( self.v1session.query(func.max(V1History.created)) .filter(V1History.server_id == self.server_id).first() ) # set very old datetime if no rows in database last_ts_from_db = res[0] or datetime(1, 1, 1) self.last_ts = last_ts_from_db + timedelta(microseconds=1) def _to_ts(self): self.read_state() return self.last_ts - timedelta(microseconds=1) def is_too_old(self): return datetime.now() - timedelta(days=30) > self._to_ts() def get_uid(self): self.read_state() return self.last_uid def convert_username_to_uid(self, username): pass def _get_history_gov_users(self): from_ts, to_ts = self.get_period() from_ts_ = gm_datetime_to_unixtimestamp(from_ts) to_ts_ = gm_datetime_to_unixtimestamp(to_ts) usernames_ = ( self.v1session.query(V1HistoryGov) .filter(V1HistoryGov.ts.between(from_ts_, to_ts_), V1HistoryGov.server_id == self.server_id) .distinct(V1HistoryGov.username) .group_by(V1HistoryGov.username) ) return [item.username for item in usernames_] def _get_history_uids(self): from_ts, to_ts = self.get_period() uids_ = ( self.v1session.query(V1History) .filter( V1History.created.between(from_ts, to_ts), V1History.server_id == self.server_id, V1History.id > self.last_uid, ) .distinct(V1History.id) .group_by(V1History.id) ) return [item.id for item in uids_] def get_uids(self): uids_list = self._get_history_uids() for username in self._get_history_gov_users(): uid = self.convert_username_to_uid(username) if uid is not None and uid > self.last_uid and uid not in uids_list: uids_list.append(uid) return sorted(uids_list) def get_period(self): """We want to go 1 hour at a time, up to 1 month back, starting from now""" to_ts = self._to_ts() from_ts = self.last_ts - timedelta(hours=1) return from_ts, to_ts class Break(Exception): pass class V1DBMigrator(LveStatsPlugin): PLUGIN_LOCATION = '/usr/share/lve-stats/plugins/v1_db_migrator.py' timeout = 18 # change default timeout is_done = False period = 60 # every minute order = 9500 # We pretty much want to be last one standing v1_connect_string = None V1Session = None # We will need it to create session on each execution time_interval = None debug = True skip_on_error = True # What if we cannot save data for some reason, if True, skip it v2_server_id = 'localhost' v1_server_id = 'localhost' def __init__(self): self.log = logging.getLogger('plugin.V1DBMigrator') self._username_to_uid_cache = {} self._no_such_uid_cache = [] self._procs = 1 self.now = 0 # This changes in MainLoop self.log.info("V1 Migration Started") self._time_commit = self.timeout * 0.5 # time limit for stopping plugin self.control_time = True self._conn = None self._database_does_not_exist = False def set_config(self, config): self.v1_server_id = config.get('v1_server_id', 'localhost') self.v2_server_id = config.get('server_id', 'localhost') self.v1_connect_string = config.get('v1_connect_string') self.debug = config.get('debug', 'F').lower() in ('t', 'y', 'true', 'yes', 1) self.init_v1_db() def init_v1_db(self, ts=STATE_FILE): if self.v1_connect_string is None: self._database_does_not_exist = True return # check present sqlite database sqlite = 'sqlite:///' if self.v1_connect_string.startswith(sqlite) and not os.path.exists(self.v1_connect_string[len(sqlite):]): self.log.warning('Database "%s" does not exist', self.v1_connect_string) self._database_does_not_exist = True return # create database engine try: v1_db_engine = sqlalchemy.engine.create_engine(self.v1_connect_string, echo=self.debug) except SQLAlchemyError as e: self.log.warning(str(e)) self._database_does_not_exist = True return # check present history table if not v1_db_engine.dialect.has_table(v1_db_engine, V1History.__tablename__): self.log.warning( 'Table "%s" in database "%s" does not exist', V1History.__tablename__, self.v1_connect_string, ) self._database_does_not_exist = True return result = validate_database(v1_db_engine, hide_logging=True, base=V1Base) if result['column_error'] or result['table_error']: self.log.warning('V1 database malformed, migration skipped.') self._database_does_not_exist = True return self.V1Session = sessionmaker(bind=v1_db_engine) self.time_interval = V1TimeInterval(self.get_v1_session(), ts, self.v1_server_id) self.time_interval.convert_username_to_uid = self.convert_username_to_uid def get_v1_session(self): return self.V1Session() def execute(self, lve_data): self._procs = lve_data.get('procs', 1) if self.is_done: # all data had been migrated return if self._database_does_not_exist or self.time_interval.is_too_old(): self.log.warning("V1 Migration Done") self.cleanup() self.fix_lost_keep_alive_records() else: self.convert_all() def fix_lost_keep_alive_records(self): session = sessionmaker(bind=self.engine)() fix_lost_keep_alive(session, server_id=self.v2_server_id, log_=self.log) session.close() def cleanup(self): """ There is not much to do on clean up. Lets just set flag done = True, and remove plugin so that on next restart it would't be running any more :return: """ self.is_done = True try: os.remove(V1DBMigrator.PLUGIN_LOCATION) # remove compiled python code os.remove(V1DBMigrator.PLUGIN_LOCATION + 'c') except (IOError, OSError) as e: self.log.error("Unable to remove %s: %s", V1DBMigrator.PLUGIN_LOCATION, str(e)) session = sessionmaker(bind=self.engine)() try: session.query(history_x60).filter(history_x60.server_id == self.v2_server_id).delete() session.commit() except SQLAlchemyError: session.rollback() def get_v1_gov_data(self, from_ts, to_ts, username): from_ts_ = gm_datetime_to_unixtimestamp(from_ts) to_ts_ = gm_datetime_to_unixtimestamp(to_ts) return ( self.get_v1_session() .query(V1HistoryGov) .filter( V1HistoryGov.ts.between(from_ts_, to_ts_), V1HistoryGov.username == username, V1HistoryGov.server_id == self.v1_server_id, ) .all() ) def get_v1_data(self, from_ts, to_ts, uid): return ( self.get_v1_session() .query(V1History) .filter( V1History.created.between(from_ts, to_ts), V1History.server_id == self.v1_server_id, V1History.id == uid ) .order_by(V1History.id) .all() ) def _convert_data(self, from_ts, to_ts, uid, trans): username = self.convert_uid_to_username(uid) try: v2_rows_insert_list = [] for row in self.get_v1_data(from_ts, to_ts, uid): v2_rows = self.convert_row(row, self._procs) v2_rows_insert_list.extend(v2_rows) if v2_rows_insert_list: for chunk in get_chunks(v2_rows_insert_list): self._conn.execute(insert(history), chunk) v2_gov_rows_insert_list = [] if username and username != 'root': # ignore uid 0 (root) for row in self.get_v1_gov_data(from_ts, to_ts, username): v2_gov_rows = self.convert_gov_row(row) v2_gov_rows_insert_list.extend(v2_gov_rows) if v2_gov_rows_insert_list: for chunk in get_chunks(v2_gov_rows_insert_list): self._conn.execute(insert(history_gov), chunk) except (SQLAlchemyError, DatabaseError) as e: trans.rollback() self.log.warning('Can not save data to database: %s', str(e)) if not self.skip_on_error: raise e except LveStatsPluginTerminated as e: trans.commit() self.log.debug("Plugin is terminated.") raise Break() from e def _work_time(self): return time.time() - self.now # calculate plugin working time def _need_break(self): return self.timeout - self._work_time() < self._time_commit * 1.2 def convert_data(self, from_ts, to_ts): self.log.debug('Start converting from %s to %s', from_ts, to_ts) uids = self.time_interval.get_uids() # obtain uids need convert if not uids: return trans = self._conn.begin() for uid in uids: self._convert_data(from_ts, to_ts, uid, trans) self.time_interval.save_uid(uid) self.log.debug( 'Converted from %s to %s uid: %s; plugin work time %s', from_ts, to_ts, uid, self._work_time(), ) # control plugin work time if self.control_time and self._need_break(): self.log.debug( 'Stop converting; plugin work time %s', self._work_time(), ) raise Break() if trans.is_active: trans.commit() def convert_all(self): with self.engine.begin() as self._conn: try: while not self._need_break() and not self.time_interval.is_too_old(): from_ts, to_ts = self.time_interval.get_period() self.convert_data(from_ts, to_ts) self.time_interval.save_timestamp(from_ts) # save timestamp if not breacke cycle only except Break: # for break all cycles pass time_start = time.time() commit_time = time.time() - time_start self._time_commit = max(self._time_commit, commit_time) self.log.debug('Commit time %s', commit_time) @staticmethod def fault_count(limit, _max): if limit == _max: return 1 else: return 0 @staticmethod def convert_iops_faults(v1_row, v2_row): # v1 & v2 store IOPS the same way, but faults are not tracked in v1 v2_row['iops_fault'] = V1DBMigrator.fault_count(v1_row.liops, v1_row.iops_max) @staticmethod def convert_io(v1_row, v2_row): # v1 stores IO in KB/s, v2 in B/s v2_row['io'] = v1_row.io * 1024 v2_row['io_limit'] = v1_row.io_limit * 1024 v2_row['io_fault'] = V1DBMigrator.fault_count(v1_row.io_limit, v1_row.io_max) @staticmethod def convert_cpu_(procs, cpu, cpu_limit, cpu_max, ncpu): """ v1 holds CPU relative to total cores, where on 4 core system 1 core is 25% it also limits by ncpu (whatever is less), so on 4 cores system 2 ncpu and 30% is 30% of all cores (as 2ncpu = 50%, and we take smaller), and 2 ncpu and 70% is 50%, as 2ncpu = 50% / we take smaller To switch to new limit, we need to talke old limit and multiply it by 100 So 25% on 4 core system in v1 (1 core), is 25 * 4 * 100 = 10,000 """ v2_cpu_limit = min(100 * cpu_limit * procs, ncpu * 100 * 100) # no matter what mistake we make, lets not ever set CPU usage > CPU limit v2_cpu = min(v2_cpu_limit, cpu * procs * 100) # if cpu_limit == cpu_max, lets consider it to be a fault, note we loose precision # anyway, so if weight was 60, we will add 60 faults... oh well. v2_cpu_faults = V1DBMigrator.fault_count(v2_cpu_limit, 100 * cpu_max * procs) return v2_cpu, v2_cpu_limit, v2_cpu_faults def convert_cpu(self, row, v2_row, procs): v2_row['cpu'], v2_row['cpu_limit'], v2_row['cpu_fault'] = self.convert_cpu_( procs, row.cpu, row.cpu_limit, row.cpu_max, row.ncpu ) def convert_username_to_uid(self, username): if username in self._username_to_uid_cache: return self._username_to_uid_cache[username] uid = uidconverter.username_to_uid_local(username) self._username_to_uid_cache[username] = uid if uid is None: self.log.warning('Can not find uid for user %s', username) return uid def convert_uid_to_username(self, uid): if uid in self._no_such_uid_cache: return for username_, uid_ in self._username_to_uid_cache.items(): if uid == uid_: return username_ username_ = uidconverter.uid_to_username_local(uid) if username_ is None: self._no_such_uid_cache.append(uid) self.log.warning('Can not find user name for uid %s', uid) else: self._username_to_uid_cache[username_] = uid return username_ def convert_gov_row(self, row): to_ts = row.ts result = [] for i in range(0, row.weight): v2_gov_row = {'server_id': self.v2_server_id, 'ts': to_ts - 60 * i} for key in V2_GOV_KEYS: v2_gov_row[key] = getattr(row, key) uid = self.convert_username_to_uid(v2_gov_row.pop('username')) if uid: v2_gov_row['uid'] = uid result.append(v2_gov_row) return result def convert_row(self, row, procs): to_ts = gm_datetime_to_unixtimestamp(row.created) result = [] for i in range(0, row.weight): v2_row = {'server_id': self.v2_server_id, 'created': to_ts - 60 * i} for key in V2_KEYS: v2_row[key] = getattr(row, key) self.convert_cpu(row, v2_row, procs) self.convert_io(row, v2_row) self.convert_iops_faults(row, v2_row) result.append(v2_row) return result