From 420c00e3b5b07fc96ab2b8f3d65dc427530af6ed Mon Sep 17 00:00:00 2001 From: Jonathan Kolyer Date: Mon, 16 Mar 2015 06:03:36 +0000 Subject: [PATCH] VRFS-2916 fixed monthly queries --- admin/app/admin/cohort_data.rb | 8 ++ admin/app/models/cohort.rb | 193 ++++++++++++++++++--------------- db/up/cohorts.sql | 14 ++- 3 files changed, 126 insertions(+), 89 deletions(-) create mode 100644 admin/app/admin/cohort_data.rb diff --git a/admin/app/admin/cohort_data.rb b/admin/app/admin/cohort_data.rb new file mode 100644 index 000000000..25a4a7303 --- /dev/null +++ b/admin/app/admin/cohort_data.rb @@ -0,0 +1,8 @@ +ActiveAdmin.register Cohort, :as => 'Cohort Data' do + + menu :label => 'Cohorts', :parent => 'Analytics' + + index do + + end +end diff --git a/admin/app/models/cohort.rb b/admin/app/models/cohort.rb index a4fb6030f..84bb23dde 100644 --- a/admin/app/models/cohort.rb +++ b/admin/app/models/cohort.rb @@ -52,11 +52,10 @@ class Cohort < ActiveRecord::Base .map { |d| [d.year,d.mon,d.day].first(prec) } end - def self.cohort_date_ranges(starting=nil, ending=nil) + def self.cohort_group_ranges(starting=nil, ending=nil) starting ||= User.where(admin: false).order(:created_at).first.created_at ending ||= Time.now - dates = self.date_tuples([starting.year, starting.month], - [ending.year, ending.month]) + dates = self.date_tuples([starting.year, starting.month], [ending.year, ending.month]) ranges = [] dates.each_with_index do |d1, idx| d2 = dates[idx+1] || [Time.now.next_month.year,Time.now.next_month.month] @@ -66,63 +65,56 @@ class Cohort < ActiveRecord::Base ranges end - def self.monthly_cohorts(start_date, end_date) - self.cohort_date_ranges(start_date, end_date).collect do |range| - unless cc = Cohort.where(start_date: range.first).where(cumulative: true).limit(1).first + def self.generate_monthly_cohorts(monthly_start, monthly_end) + Cohort.delete_all(['all_time = ?',false]) + self.cohort_group_ranges.collect do |range| + next if range.first > monthly_start + cc = Cohort.new + cc.group_start = range.first + cc.group_end = range.last + cc.monthly_start = monthly_start + cc.monthly_end = monthly_end + cc.all_time = false + cc.save! + cc + end + end + + def self.generate_all_time_cohorts + self.cohort_group_ranges.collect do |range| + unless cc = Cohort.where(group_start: range.first).where(all_time: true).limit(1).first cc = Cohort.new - cc.start_date = range.first - cc.end_date = range.last + cc.group_start = range.first + cc.group_end = range.last + cc.all_time = true cc.save! end cc end end - def self.cumulative_cohorts - self.cohort_date_ranges.collect do |range| - unless cc = Cohort.where(start_date: range.first).where(cumulative: true).limit(1).first - cc = Cohort.new - cc.start_date = range.first - cc.end_date = range.last - cc.cumulative = true - cc.save! - end - cc - end - end - - def _join_user_all_time(assoc_ref) - assoc_ref.active_record - .joins("INNER JOIN users AS uu ON uu.id = #{assoc_ref.foreign_key}") - .where(created_at: self.start_date..self.end_date) - .where(['uu.created_at >= ? AND uu.created_at <= ?', - self.start_date, self.end_date]) - end - - def _attribute_within_monthly(active_record, attrib) - active_record.where(attrib => self.start_date..self.end_date) - end - def _put_data_set(key, count, num_user) - self.data_set[key] = count + self.data_set[key.to_s] = count self.data_set["#{key}%"] = 100.0 * (count.to_f / num_user.to_f) end - def self.user_attribute_within(attrib, cohort) - User.where(attrib => cohort.start_date..cohort.end_date) + def self.cohort_users(cohort) + User.where(created_at: cohort.group_start..cohort.group_end) end - def _monthly_played_online_count(constraint) + def _played_online_subquery(constraint) where = if constraint.is_a?(Range) "played.cnt >= #{constraint.first} AND played.cnt <= #{constraint.last}" else - "played.cnt = #{constraint}" + "played.cnt #{constraint}" end + start_date = all_time ? self.group_start : self.monthly_start + end_date = all_time ? self.group_end : self.monthly_end sql =<= '#{self.start_date}' AND msuh1.created_at <= '#{self.end_date}' AND + msuh1.created_at >= '#{start_date}' AND msuh1.created_at <= '#{end_date}' AND EXTRACT(EPOCH FROM (msuh1.session_removed_at - msuh1.created_at)) >= 900 AND (SELECT COUNT(*) FROM music_sessions_user_history msuh2 WHERE msuh1.music_session_id = msuh2.music_session_id @@ -131,11 +123,24 @@ SELECT COUNT(*) FROM ) played WHERE #{where} SQL - MusicSessionUserHistory.connection.execute(sql)[0]['count'].to_i end + def _subquery(assoc_key, num_user) + assoc = User.reflections[assoc_key] + start_date = all_time ? self.group_start : self.monthly_start + end_date = all_time ? self.group_end : self.monthly_end + sql =<= '#{start_date}' AND + tt.created_at <= '#{end_date}' +SQL + yield(sql) if block_given? + self.class.cohort_users(self).where("users.id IN (#{sql})").count + end + def _monthly! - unless 0 < num_user = self.class.user_attribute_within(:created_at, self).count + unless 0 < num_user = self.class.cohort_users(self).count self.update_attribute(:data_set, {}) return end @@ -143,40 +148,55 @@ SQL self.data_set['registered_users'] = num_user num_user = num_user.to_f - count = self.class.user_attribute_within(:first_downloaded_client_at, self).count - _put_data_set('first_downloaded_client_at', count, num_user) + qq = self.class.cohort_users(self) + .where(first_downloaded_client_at: self.monthly_start..self.monthly_end) + _put_data_set(:first_downloaded_client_at, qq.count, num_user) - count = self.class.user_attribute_within(:first_certified_gear_at, self).count - _put_data_set('first_certified_gear_at', count, num_user) + qq = self.class.cohort_users(self) + .where(first_certified_gear_at: self.monthly_start..self.monthly_end) + _put_data_set(:first_certified_gear_at, qq.count, num_user) - count = _attribute_within_monthly(InvitedUser, :created_at).count - _put_data_set('invited_users', count, num_user) + count = _subquery(assoc_key = :invited_users, num_user) + _put_data_set(assoc_key, count, num_user) - count = _attribute_within_monthly(RecordedTrack, :created_at).count - _put_data_set('recorded_tracks', count, num_user) + count = _subquery(assoc_key = :recorded_tracks, num_user) + _put_data_set(assoc_key, count, num_user) - count = _attribute_within_monthly(JamTrackRight, :created_at).count - _put_data_set('jam_track_rights', count, num_user) + count = _subquery(assoc_key = :jam_track_rights, num_user) + _put_data_set(assoc_key, count, num_user) - count = _attribute_within_monthly(JamTrackRight, :created_at) - .where(redeemed: true) - .count - _put_data_set('jam_track_rights_redeemed', count, num_user) + count = _subquery(assoc_key = :jam_track_rights, num_user) do |subsql| + subsql += " AND tt.redeemed = 't' " + end + _put_data_set(assoc_key, count, num_user) - count = _attribute_within_monthly(Friendship, :created_at) - .joins("INNER JOIN friendships AS fff ON fff.friend_id = friendships.user_id") - .where(['fff.created_at >= ? AND fff.created_at <= ?', - self.start_date, self.end_date]).count - count /= 2 - _put_data_set('friendships', count, num_user) + count = _subquery(assoc_key = :friendships, num_user) + _put_data_set(assoc_key, count, num_user) - count = self._monthly_played_online_count(1) - _put_data_set('music_sessions_user_history_1', count, num_user) + sql = _played_online_subquery(' = 1 ') + count = self.class.cohort_users(self).where("users.id IN (#{sql})").count + _put_data_set(:music_sessions_user_history_1, count, num_user) + sql = _played_online_subquery(2..5) + count = self.class.cohort_users(self).where("users.id IN (#{sql})").count + _put_data_set(:music_sessions_user_history_2_5, count, num_user) + + sql = _played_online_subquery(' >= 6') + count = self.class.cohort_users(self).where("users.id IN (#{sql})").count + _put_data_set(:music_sessions_user_history_6_, count, num_user) + + self.save! end + def _join_user_all_time(assoc_ref) + assoc_ref.active_record + .joins("INNER JOIN users AS uu ON uu.id = #{assoc_ref.foreign_key}") + .where(created_at: self.group_start..self.group_end) + .where(['uu.created_at >= ? AND uu.created_at <= ?', self.group_start, self.group_end]) + end + def _all_time! - unless 0 < num_user = self.class.user_attribute_within_monthly(:created_at, self).count + unless 0 < num_user = self.class.cohort_users(self).count self.update_attribute(:data_set, {}) return end @@ -184,40 +204,43 @@ SQL self.data_set['registered_users'] = num_user num_user = num_user.to_f - count = self.class.user_attribute_within(:first_downloaded_client_at, self) - .where(created_at: self.start_date..self.end_date) + count = self.class.cohort_users(self) + .where(['first_downloaded_client_at IS NOT NULL']) .count _put_data_set('first_downloaded_client_at', count, num_user) - count = self.class.user_attribute_within(:first_certified_gear_at, self) - .where(created_at: self.start_date..self.end_date) + count = self.class.cohort_users(self) + .where(['first_certified_gear_at IS NOT NULL']) .count _put_data_set('first_certified_gear_at', count, num_user) - count = _join_user_all_time(InvitedUser.reflections[:sender]).count - _put_data_set('invited_users', count, num_user) + count = _subquery(assoc_key = :invited_users, num_user) + _put_data_set(assoc_key, count, num_user) - count = _join_user_all_time(RecordedTrack.reflections[:user]).count - _put_data_set('recorded_tracks', count, num_user) + count = _subquery(assoc_key = :recorded_tracks, num_user) + _put_data_set(assoc_key, count, num_user) - count = _join_user_all_time(Friendship.reflections[:user]) - .joins("INNER JOIN friendships AS fff ON fff.friend_id = uu.id") - .where(['fff.created_at >= ? AND fff.created_at <= ?', - self.start_date, self.end_date]).count - count /= 2 - _put_data_set('friendships', count, num_user) + count = _subquery(assoc_key = :friendships, num_user) + _put_data_set(assoc_key, count, num_user) - count = _join_user_all_time(JamTrackRight.reflections[:user]).count - _put_data_set('jam_track_rights', count, num_user) + count = _subquery(assoc_key = :jam_track_rights, num_user) + _put_data_set(assoc_key, count, num_user) - count = _join_user_all_time(MusicSessionUserHistory.reflections[:user]).count - _put_data_set('music_sessions_user_history', count, num_user) + sql = _played_online_subquery(' >= 1') + count = self.class.cohort_users(self).where("users.id IN (#{sql})").count + _put_data_set(:music_sessions_user_history, count, num_user) self.save! end def populate! - self.cumulative ? _all_time! : _monthly! + self.all_time ? _all_time! : _monthly! + end + + def self.monthly_cohorts(monthly_start, monthly_end) + self.generate_monthly_cohorts(monthly_start, monthly_end).compact.each do |cc| + cc._monthly! + end end end diff --git a/db/up/cohorts.sql b/db/up/cohorts.sql index 2fae882ca..3a7cbbb5b 100644 --- a/db/up/cohorts.sql +++ b/db/up/cohorts.sql @@ -1,13 +1,19 @@ CREATE TABLE cohorts ( id VARCHAR(64) PRIMARY KEY DEFAULT uuid_generate_v4(), - start_date TIMESTAMP NOT NULL, - end_date TIMESTAMP NOT NULL, - cumulative BOOLEAN NOT NULL DEFAULT FALSE, data_set JSON NOT NULL DEFAULT '{}', + group_start TIMESTAMP NOT NULL, + group_end TIMESTAMP NOT NULL, + + all_time BOOLEAN NOT NULL DEFAULT FALSE, + monthly_start TIMESTAMP, + monthly_end TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); -CREATE INDEX index_started_date ON cohorts USING btree (start_date); +CREATE INDEX index_group_date ON cohorts USING btree (group_start); + +CREATE INDEX msuh_music_session_idx ON music_sessions_user_history USING btree(music_session_id);