I want to run a cronjob to aggregate data from my users for every hour. Currently there’s quite a lot of data to aggregate and i’d ideally want to use python, and am currently running an express server.
I’m wondering – how would I run these scripts from express?
import psycopg2 as pg
import pandas as pd
from datetime import datetime, timedelta
import db_conn
def handler(thing, context):
"""
this connects, calculates the last hour, then writes a new line to a table in postgress.
"""
conn = db_conn.main()
#step one
current_hour_epoch, previous_hour_epoch = calculate_last_utc_hour()
#step two
df = query_sentiment_statistics(conn, previous_hour_epoch, current_hour_epoch)
#step three
write_db(conn, df, previous_hour_epoch, 1)
pass
def calculate_last_utc_hour():
"""
this is used for the cronjob processing.
"""
current_time = datetime.now()
current_hour_start = current_time.replace(minute=0, second=0, microsecond=0)
previous_hour_start = current_hour_start - timedelta(hours = 1)
current_hour_epoch = datetime.timestamp(current_hour_start)
previous_hour_epoch = datetime.timestamp(previous_hour_start)
print("query from ", previous_hour_epoch, "to", current_hour_epoch)
return current_hour_epoch, previous_hour_epoch
def query_sentiment_statistics(conn, lower, upper):
"""
Gathers sentiment data statistical information.
"""
sql = """
SELECT
user_id,
AVG(sentiment) as sentiment,
AVG(magnitude) as magnitude,
SUM(sentiment) as total_sentiment,
SUM(magnitude) as total_magnitude,
MAX(sentiment) as max_sentiment,
MIN(sentiment) as min_sentiment,
COUNT(user_id) as count
FROM
sentiments
WHERE
created
BETWEEN %s and %s
GROUP BY
user_id;
"""
try:
cur = conn.cursor()
df = pd.read_sql(sql, con=conn, params=(lower, upper))
cur.close()
return df
except (Exception, pg.DatabaseError) as error:
print(error)
pass
def write_db(conn, dataframe, lower, detail = 1):
# set the additional columns required to be written into the table
dataframe['detail'] = detail
dataframe['created'] = lower
try:
#writes to new table that aggregates sentiments
dataframe.to_sql('sentiments_aggregate', con = conn, if_exists = 'append', index = False)
except (Exception, pg.DatabaseError) as error:
print(error)
pass
if __name__ == '__main__':
pass
Would it just be easier to do the same code in javascript? something liek this,
const { db } = require("./db");
const sql = "SELECT user_id, AVG(sentiment) as sentiment, AVG(magnitude) as magnitude, SUM(sentiment) as total_sentiment, SUM(magnitude) as total_magnitude,MAX(sentiment) as max_sentiment, MIN(sentiment) as min_sentiment,COUNT(user_id) as count FROM sentiments WHERE created BETWEEN %s and %s GROUP BY user_id;"
db.query(sql, [id], (err, rows) => {
});
}
and using node-cron to replicate the time intervals?
thanks!