mirror of
https://gitlab.com/tildes/tildes.git
synced 2026-04-16 06:18:34 +02:00
Add group_stats table, track daily topics/comments
This adds a group_stats table and cronjob that will insert the previous day's stats into it each day just after 00:00 UTC.
This commit is contained in:
@@ -13,6 +13,13 @@ data-cleanup-cronjob:
|
||||
- hour: 4
|
||||
- minute: 10
|
||||
|
||||
generate-group-stats-for-yesterday-cronjob:
|
||||
cron.present:
|
||||
- name: {{ bin_dir }}/python -c "from scripts.generate_group_stats_for_yesterday import generate_stats; generate_stats('{{ app_dir }}/{{ pillar['ini_file'] }}')"
|
||||
- user: {{ app_username }}
|
||||
- hour: 0
|
||||
- minute: 10
|
||||
|
||||
generate-site-icons-css-cronjob:
|
||||
cron.present:
|
||||
- name: {{ bin_dir }}/python -c "from scripts.generate_site_icons_css import generate_css; generate_css()"
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
"""Add group_stats table
|
||||
|
||||
Revision ID: 9148909b78e9
|
||||
Revises: fe91222503ef
|
||||
Create Date: 2020-03-06 02:27:31.720325
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "9148909b78e9"
|
||||
down_revision = "fe91222503ef"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.create_table(
|
||||
"group_stats",
|
||||
sa.Column("group_id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"stat",
|
||||
postgresql.ENUM("TOPICS_POSTED", "COMMENTS_POSTED", name="groupstattype"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("period", postgresql.TSTZRANGE(), nullable=False),
|
||||
sa.Column("value", sa.Float(), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["group_id"],
|
||||
["groups.group_id"],
|
||||
name=op.f("fk_group_stats_group_id_groups"),
|
||||
),
|
||||
sa.PrimaryKeyConstraint(
|
||||
"group_id", "stat", "period", name=op.f("pk_group_stats")
|
||||
),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_group_stats_period_gist",
|
||||
"group_stats",
|
||||
["period"],
|
||||
unique=False,
|
||||
postgresql_using="gist",
|
||||
)
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_index("ix_group_stats_period_gist", table_name="group_stats")
|
||||
op.drop_table("group_stats")
|
||||
86
tildes/scripts/generate_group_stats_for_yesterday.py
Normal file
86
tildes/scripts/generate_group_stats_for_yesterday.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""Script for generating group statistics for yesterday (UTC).
|
||||
|
||||
This script is not very flexible - no matter what time it is run, it will always
|
||||
generate stats for the previous UTC day for all groups and store them in the group_stats
|
||||
table.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from tildes.enums import GroupStatType
|
||||
from tildes.lib.database import get_session_from_config
|
||||
from tildes.lib.datetime import utc_now
|
||||
from tildes.models.comment import Comment
|
||||
from tildes.models.group import Group, GroupStat
|
||||
from tildes.models.topic import Topic
|
||||
|
||||
|
||||
def generate_stats(config_path: str) -> None:
|
||||
"""Generate all stats for all groups for yesterday (UTC)."""
|
||||
db_session = get_session_from_config(config_path)
|
||||
|
||||
# the end time is the start of the current day, start time 1 day before that
|
||||
end_time = utc_now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
start_time = end_time - timedelta(days=1)
|
||||
|
||||
groups = db_session.query(Group).all()
|
||||
|
||||
for group in groups:
|
||||
with db_session.no_autoflush:
|
||||
db_session.add(topics_posted(db_session, group, start_time, end_time))
|
||||
db_session.add(comments_posted(db_session, group, start_time, end_time))
|
||||
|
||||
try:
|
||||
db_session.commit()
|
||||
except IntegrityError:
|
||||
# stats have already run for this group/period combination, just skip
|
||||
continue
|
||||
|
||||
|
||||
def topics_posted(
|
||||
db_session: Session, group: Group, start_time: datetime, end_time: datetime
|
||||
) -> GroupStat:
|
||||
"""Generate a GroupStat for topics posted in the group between start/end times."""
|
||||
num_topics = (
|
||||
db_session.query(Topic)
|
||||
.filter(
|
||||
Topic.group == group,
|
||||
Topic.created_time >= start_time,
|
||||
Topic.created_time < end_time,
|
||||
Topic.is_deleted == False, # noqa
|
||||
Topic.is_removed == False, # noqa
|
||||
)
|
||||
.count()
|
||||
)
|
||||
|
||||
return GroupStat(
|
||||
group, GroupStatType.TOPICS_POSTED, start_time, end_time, num_topics
|
||||
)
|
||||
|
||||
|
||||
def comments_posted(
|
||||
db_session: Session, group: Group, start_time: datetime, end_time: datetime
|
||||
) -> GroupStat:
|
||||
"""Generate a GroupStat for comments posted in the group between start/end times."""
|
||||
num_comments = (
|
||||
db_session.query(Comment)
|
||||
.join(Topic)
|
||||
.filter(
|
||||
Topic.group == group,
|
||||
Comment.created_time >= start_time,
|
||||
Comment.created_time < end_time,
|
||||
Comment.is_deleted == False, # noqa
|
||||
Comment.is_removed == False, # noqa
|
||||
)
|
||||
.count()
|
||||
)
|
||||
|
||||
return GroupStat(
|
||||
group, GroupStatType.COMMENTS_POSTED, start_time, end_time, num_comments
|
||||
)
|
||||
@@ -13,7 +13,7 @@ from tildes.models.comment import (
|
||||
CommentVote,
|
||||
)
|
||||
from tildes.models.financials import Financials
|
||||
from tildes.models.group import Group, GroupSubscription
|
||||
from tildes.models.group import Group, GroupStat, GroupSubscription
|
||||
from tildes.models.log import Log
|
||||
from tildes.models.message import MessageConversation, MessageReply
|
||||
from tildes.models.scraper import ScraperResult
|
||||
|
||||
@@ -165,6 +165,13 @@ class FinancialEntryType(enum.Enum):
|
||||
INCOME = enum.auto()
|
||||
|
||||
|
||||
class GroupStatType(enum.Enum):
|
||||
"""Enum for types of group statistics."""
|
||||
|
||||
TOPICS_POSTED = enum.auto()
|
||||
COMMENTS_POSTED = enum.auto()
|
||||
|
||||
|
||||
class LogEventType(enum.Enum):
|
||||
"""Enum for the types of events stored in logs."""
|
||||
|
||||
|
||||
@@ -2,5 +2,6 @@
|
||||
|
||||
from .group import Group
|
||||
from .group_query import GroupQuery
|
||||
from .group_stat import GroupStat
|
||||
from .group_subscription import GroupSubscription
|
||||
from .group_wiki_page import GroupWikiPage
|
||||
|
||||
54
tildes/tildes/models/group/group_stat.py
Normal file
54
tildes/tildes/models/group/group_stat.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# Copyright (c) 2020 Tildes contributors <code@tildes.net>
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
"""Contains the GroupStat class."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Union
|
||||
|
||||
from psycopg2.extras import DateTimeTZRange
|
||||
from sqlalchemy import Column, Float, ForeignKey, Index, Integer
|
||||
from sqlalchemy.dialects.postgresql import ENUM, TSTZRANGE
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from tildes.enums import GroupStatType
|
||||
from tildes.models import DatabaseModel
|
||||
|
||||
from .group import Group
|
||||
|
||||
|
||||
class GroupStat(DatabaseModel):
|
||||
"""Model for a statistic of a group inside a certain time period."""
|
||||
|
||||
__tablename__ = "group_stats"
|
||||
|
||||
group_id: int = Column(
|
||||
Integer, ForeignKey("groups.group_id"), nullable=False, primary_key=True,
|
||||
)
|
||||
stat: GroupStatType = Column(ENUM(GroupStatType), nullable=False, primary_key=True)
|
||||
period: DateTimeTZRange = Column(TSTZRANGE, nullable=False, primary_key=True)
|
||||
value: float = Column(Float, nullable=False)
|
||||
|
||||
group: Group = relationship("Group", innerjoin=True, lazy=False)
|
||||
|
||||
# Add a GiST index on the period column for range operators
|
||||
__table_args__ = (
|
||||
Index("ix_group_stats_period_gist", period, postgresql_using="gist"),
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
group: Group,
|
||||
stat: GroupStatType,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
value: Union[int, float],
|
||||
):
|
||||
"""Create a new statistic for the group and time period.
|
||||
|
||||
The time period will be inclusive of start_time but exclusive of end_time.
|
||||
"""
|
||||
self.group = group
|
||||
self.stat = stat
|
||||
self.period = DateTimeTZRange(start_time, end_time, bounds="[)")
|
||||
self.value = float(value)
|
||||
Reference in New Issue
Block a user