|
|
@@ -0,0 +1,144 @@
|
|
|
+import boto3
|
|
|
+import click
|
|
|
+import json
|
|
|
+
|
|
|
+from datetime import datetime
|
|
|
+from dynaconf import settings
|
|
|
+from sqlalchemy import create_engine, Table, MetaData, Column, Float, String, \
|
|
|
+ Integer, UniqueConstraint
|
|
|
+from sqlalchemy.sql import insert, select
|
|
|
+
|
|
|
+
|
|
|
+__ENGINE = None
|
|
|
+
|
|
|
+
|
|
|
+def _get_engine():
|
|
|
+ """
|
|
|
+ Get the sqlalchemy engine
|
|
|
+ """
|
|
|
+ global __ENGINE
|
|
|
+ if __ENGINE is None:
|
|
|
+ __ENGINE = create_engine(settings.get('database_uri'))
|
|
|
+ return __ENGINE
|
|
|
+
|
|
|
+
|
|
|
+def _initialize_db():
|
|
|
+ """
|
|
|
+ Initialize the database
|
|
|
+ """
|
|
|
+ engine = _get_engine()
|
|
|
+ meta = MetaData()
|
|
|
+ Table(
|
|
|
+ 'openaq_measurements', meta,
|
|
|
+ Column('id', Integer, primary_key=True),
|
|
|
+ Column('message_id', String),
|
|
|
+ Column('time_received', Integer),
|
|
|
+ Column('time_measurement', Integer),
|
|
|
+ Column('location', String),
|
|
|
+ Column('averaging_hours', Float),
|
|
|
+ Column('parameter', String),
|
|
|
+ Column('value_ug_m3', Float),
|
|
|
+ UniqueConstraint('message_id', name='unique_msgid')
|
|
|
+ )
|
|
|
+ meta.create_all(engine)
|
|
|
+
|
|
|
+
|
|
|
+def _get_averaging_hours(period):
|
|
|
+ """
|
|
|
+ Return the averaging period in hours
|
|
|
+ """
|
|
|
+ period_time = period['value']
|
|
|
+ period_unit = period['unit']
|
|
|
+ if period_unit == 'hours':
|
|
|
+ return period_time
|
|
|
+ raise ValueError(f'Invalid period unit encountered: {period_unit}')
|
|
|
+
|
|
|
+
|
|
|
+def _get_value(message):
|
|
|
+ """
|
|
|
+ Return the measurement value in message
|
|
|
+ """
|
|
|
+ measurement_unit = message['unit']
|
|
|
+ measurement = message['value']
|
|
|
+ if measurement_unit == "µg/m³":
|
|
|
+ return measurement
|
|
|
+ raise ValueError(f'Invalid measurement unit: {measurement_unit}')
|
|
|
+
|
|
|
+
|
|
|
+def _parse_date(strdate):
|
|
|
+ """
|
|
|
+ Parse date in ISO 8601 format to unix timestamp
|
|
|
+ """
|
|
|
+ return int(datetime.strptime(strdate, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
|
|
|
+
|
|
|
+
|
|
|
+def store(message):
|
|
|
+ """
|
|
|
+ Store the message in the database.
|
|
|
+ """
|
|
|
+ engine = _get_engine()
|
|
|
+ i = insert(Table('openaq_measurements', MetaData(), autoload=True,
|
|
|
+ autoload_with=engine))
|
|
|
+ content = json.loads(message['Message'])
|
|
|
+ statement = i.values({
|
|
|
+ 'message_id': message['MessageId'],
|
|
|
+ 'time_received': _parse_date(message['Timestamp']),
|
|
|
+ 'time_measurement': _parse_date(content['date']['utc']),
|
|
|
+ 'location': content['location'],
|
|
|
+ 'averaging_hours': _get_averaging_hours(
|
|
|
+ content['averagingPeriod']
|
|
|
+ ),
|
|
|
+ 'parameter': content['parameter'],
|
|
|
+ 'value_ug_m3': _get_value(content)
|
|
|
+ })
|
|
|
+ engine.execute(statement)
|
|
|
+
|
|
|
+
|
|
|
+def get(message_id):
|
|
|
+ """
|
|
|
+ Get the message with id message_id.
|
|
|
+ """
|
|
|
+ engine = _get_engine()
|
|
|
+ table = Table('openaq_measurements', MetaData(), autoload=True,
|
|
|
+ autoload_with=engine)
|
|
|
+ s = select([table]).where(table.c.message_id == message_id)
|
|
|
+ return engine.execute(s).fetchall()
|
|
|
+
|
|
|
+
|
|
|
+def process_queue(amount=10, delete=False):
|
|
|
+ """
|
|
|
+ Process messages in the queue
|
|
|
+ """
|
|
|
+ # Get the service resource
|
|
|
+ sqs = boto3.resource('sqs', region_name='eu-west-1')
|
|
|
+
|
|
|
+ # Get the queue
|
|
|
+ queue = sqs.get_queue_by_name(QueueName='openaq')
|
|
|
+
|
|
|
+ msgs = queue.receive_messages(MaxNumberOfMessages=10)
|
|
|
+ counter = 10
|
|
|
+ while len(msgs):
|
|
|
+ for msg in msgs:
|
|
|
+ data = json.loads(msg.body)
|
|
|
+ print(f'Treating message {data["MessageId"]}')
|
|
|
+ if not len(get(data['MessageId'])):
|
|
|
+ store(data)
|
|
|
+ if delete:
|
|
|
+ msg.delete()
|
|
|
+ counter += 10
|
|
|
+ if counter > amount:
|
|
|
+ break
|
|
|
+ msgs = queue.receive_messages(MaxNumberOfMessages=10)
|
|
|
+
|
|
|
+
|
|
|
+@click.command()
|
|
|
+@click.option('--amount', default=10, help='Amount of messages to fetch.')
|
|
|
+@click.option('--delete', default=False, type=click.BOOL,
|
|
|
+ help='Delete messages from the queue after successful storage.')
|
|
|
+def run_process_queue(amount, delete):
|
|
|
+ process_queue(amount, delete)
|
|
|
+
|
|
|
+
|
|
|
+@click.command()
|
|
|
+def init_db():
|
|
|
+ _initialize_db()
|