VRFS-2916 fixed monthly queries

This commit is contained in:
Jonathan Kolyer 2015-03-16 06:03:36 +00:00
parent 087ec9b1e0
commit 420c00e3b5
3 changed files with 126 additions and 89 deletions

View File

@ -0,0 +1,8 @@
ActiveAdmin.register Cohort, :as => 'Cohort Data' do
menu :label => 'Cohorts', :parent => 'Analytics'
index do
end
end

View File

@ -52,11 +52,10 @@ class Cohort < ActiveRecord::Base
.map { |d| [d.year,d.mon,d.day].first(prec) } .map { |d| [d.year,d.mon,d.day].first(prec) }
end end
def self.cohort_date_ranges(starting=nil, ending=nil) def self.cohort_group_ranges(starting=nil, ending=nil)
starting ||= User.where(admin: false).order(:created_at).first.created_at starting ||= User.where(admin: false).order(:created_at).first.created_at
ending ||= Time.now ending ||= Time.now
dates = self.date_tuples([starting.year, starting.month], dates = self.date_tuples([starting.year, starting.month], [ending.year, ending.month])
[ending.year, ending.month])
ranges = [] ranges = []
dates.each_with_index do |d1, idx| dates.each_with_index do |d1, idx|
d2 = dates[idx+1] || [Time.now.next_month.year,Time.now.next_month.month] d2 = dates[idx+1] || [Time.now.next_month.year,Time.now.next_month.month]
@ -66,63 +65,56 @@ class Cohort < ActiveRecord::Base
ranges ranges
end end
def self.monthly_cohorts(start_date, end_date) def self.generate_monthly_cohorts(monthly_start, monthly_end)
self.cohort_date_ranges(start_date, end_date).collect do |range| Cohort.delete_all(['all_time = ?',false])
unless cc = Cohort.where(start_date: range.first).where(cumulative: true).limit(1).first self.cohort_group_ranges.collect do |range|
next if range.first > monthly_start
cc = Cohort.new
cc.group_start = range.first
cc.group_end = range.last
cc.monthly_start = monthly_start
cc.monthly_end = monthly_end
cc.all_time = false
cc.save!
cc
end
end
def self.generate_all_time_cohorts
self.cohort_group_ranges.collect do |range|
unless cc = Cohort.where(group_start: range.first).where(all_time: true).limit(1).first
cc = Cohort.new cc = Cohort.new
cc.start_date = range.first cc.group_start = range.first
cc.end_date = range.last cc.group_end = range.last
cc.all_time = true
cc.save! cc.save!
end end
cc cc
end end
end end
def self.cumulative_cohorts
self.cohort_date_ranges.collect do |range|
unless cc = Cohort.where(start_date: range.first).where(cumulative: true).limit(1).first
cc = Cohort.new
cc.start_date = range.first
cc.end_date = range.last
cc.cumulative = true
cc.save!
end
cc
end
end
def _join_user_all_time(assoc_ref)
assoc_ref.active_record
.joins("INNER JOIN users AS uu ON uu.id = #{assoc_ref.foreign_key}")
.where(created_at: self.start_date..self.end_date)
.where(['uu.created_at >= ? AND uu.created_at <= ?',
self.start_date, self.end_date])
end
def _attribute_within_monthly(active_record, attrib)
active_record.where(attrib => self.start_date..self.end_date)
end
def _put_data_set(key, count, num_user) def _put_data_set(key, count, num_user)
self.data_set[key] = count self.data_set[key.to_s] = count
self.data_set["#{key}%"] = 100.0 * (count.to_f / num_user.to_f) self.data_set["#{key}%"] = 100.0 * (count.to_f / num_user.to_f)
end end
def self.user_attribute_within(attrib, cohort) def self.cohort_users(cohort)
User.where(attrib => cohort.start_date..cohort.end_date) User.where(created_at: cohort.group_start..cohort.group_end)
end end
def _monthly_played_online_count(constraint) def _played_online_subquery(constraint)
where = if constraint.is_a?(Range) where = if constraint.is_a?(Range)
"played.cnt >= #{constraint.first} AND played.cnt <= #{constraint.last}" "played.cnt >= #{constraint.first} AND played.cnt <= #{constraint.last}"
else else
"played.cnt = #{constraint}" "played.cnt #{constraint}"
end end
start_date = all_time ? self.group_start : self.monthly_start
end_date = all_time ? self.group_end : self.monthly_end
sql =<<SQL sql =<<SQL
SELECT COUNT(*) FROM SELECT played.user_id FROM
(SELECT COUNT(*) cnt FROM music_sessions_user_history msuh1 (SELECT user_id, COUNT(*) cnt FROM music_sessions_user_history msuh1
WHERE WHERE
msuh1.created_at >= '#{self.start_date}' AND msuh1.created_at <= '#{self.end_date}' AND msuh1.created_at >= '#{start_date}' AND msuh1.created_at <= '#{end_date}' AND
EXTRACT(EPOCH FROM (msuh1.session_removed_at - msuh1.created_at)) >= 900 AND EXTRACT(EPOCH FROM (msuh1.session_removed_at - msuh1.created_at)) >= 900 AND
(SELECT COUNT(*) FROM music_sessions_user_history msuh2 (SELECT COUNT(*) FROM music_sessions_user_history msuh2
WHERE msuh1.music_session_id = msuh2.music_session_id WHERE msuh1.music_session_id = msuh2.music_session_id
@ -131,11 +123,24 @@ SELECT COUNT(*) FROM
) played ) played
WHERE #{where} WHERE #{where}
SQL SQL
MusicSessionUserHistory.connection.execute(sql)[0]['count'].to_i
end end
def _subquery(assoc_key, num_user)
assoc = User.reflections[assoc_key]
start_date = all_time ? self.group_start : self.monthly_start
end_date = all_time ? self.group_end : self.monthly_end
sql =<<SQL
SELECT #{assoc.foreign_key} FROM #{assoc.class_name.constantize.table_name} tt
WHERE
tt.created_at >= '#{start_date}' AND
tt.created_at <= '#{end_date}'
SQL
yield(sql) if block_given?
self.class.cohort_users(self).where("users.id IN (#{sql})").count
end
def _monthly! def _monthly!
unless 0 < num_user = self.class.user_attribute_within(:created_at, self).count unless 0 < num_user = self.class.cohort_users(self).count
self.update_attribute(:data_set, {}) self.update_attribute(:data_set, {})
return return
end end
@ -143,40 +148,55 @@ SQL
self.data_set['registered_users'] = num_user self.data_set['registered_users'] = num_user
num_user = num_user.to_f num_user = num_user.to_f
count = self.class.user_attribute_within(:first_downloaded_client_at, self).count qq = self.class.cohort_users(self)
_put_data_set('first_downloaded_client_at', count, num_user) .where(first_downloaded_client_at: self.monthly_start..self.monthly_end)
_put_data_set(:first_downloaded_client_at, qq.count, num_user)
count = self.class.user_attribute_within(:first_certified_gear_at, self).count qq = self.class.cohort_users(self)
_put_data_set('first_certified_gear_at', count, num_user) .where(first_certified_gear_at: self.monthly_start..self.monthly_end)
_put_data_set(:first_certified_gear_at, qq.count, num_user)
count = _attribute_within_monthly(InvitedUser, :created_at).count count = _subquery(assoc_key = :invited_users, num_user)
_put_data_set('invited_users', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _attribute_within_monthly(RecordedTrack, :created_at).count count = _subquery(assoc_key = :recorded_tracks, num_user)
_put_data_set('recorded_tracks', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _attribute_within_monthly(JamTrackRight, :created_at).count count = _subquery(assoc_key = :jam_track_rights, num_user)
_put_data_set('jam_track_rights', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _attribute_within_monthly(JamTrackRight, :created_at) count = _subquery(assoc_key = :jam_track_rights, num_user) do |subsql|
.where(redeemed: true) subsql += " AND tt.redeemed = 't' "
.count end
_put_data_set('jam_track_rights_redeemed', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _attribute_within_monthly(Friendship, :created_at) count = _subquery(assoc_key = :friendships, num_user)
.joins("INNER JOIN friendships AS fff ON fff.friend_id = friendships.user_id") _put_data_set(assoc_key, count, num_user)
.where(['fff.created_at >= ? AND fff.created_at <= ?',
self.start_date, self.end_date]).count
count /= 2
_put_data_set('friendships', count, num_user)
count = self._monthly_played_online_count(1) sql = _played_online_subquery(' = 1 ')
_put_data_set('music_sessions_user_history_1', count, num_user) count = self.class.cohort_users(self).where("users.id IN (#{sql})").count
_put_data_set(:music_sessions_user_history_1, count, num_user)
sql = _played_online_subquery(2..5)
count = self.class.cohort_users(self).where("users.id IN (#{sql})").count
_put_data_set(:music_sessions_user_history_2_5, count, num_user)
sql = _played_online_subquery(' >= 6')
count = self.class.cohort_users(self).where("users.id IN (#{sql})").count
_put_data_set(:music_sessions_user_history_6_, count, num_user)
self.save!
end end
def _join_user_all_time(assoc_ref)
assoc_ref.active_record
.joins("INNER JOIN users AS uu ON uu.id = #{assoc_ref.foreign_key}")
.where(created_at: self.group_start..self.group_end)
.where(['uu.created_at >= ? AND uu.created_at <= ?', self.group_start, self.group_end])
end
def _all_time! def _all_time!
unless 0 < num_user = self.class.user_attribute_within_monthly(:created_at, self).count unless 0 < num_user = self.class.cohort_users(self).count
self.update_attribute(:data_set, {}) self.update_attribute(:data_set, {})
return return
end end
@ -184,40 +204,43 @@ SQL
self.data_set['registered_users'] = num_user self.data_set['registered_users'] = num_user
num_user = num_user.to_f num_user = num_user.to_f
count = self.class.user_attribute_within(:first_downloaded_client_at, self) count = self.class.cohort_users(self)
.where(created_at: self.start_date..self.end_date) .where(['first_downloaded_client_at IS NOT NULL'])
.count .count
_put_data_set('first_downloaded_client_at', count, num_user) _put_data_set('first_downloaded_client_at', count, num_user)
count = self.class.user_attribute_within(:first_certified_gear_at, self) count = self.class.cohort_users(self)
.where(created_at: self.start_date..self.end_date) .where(['first_certified_gear_at IS NOT NULL'])
.count .count
_put_data_set('first_certified_gear_at', count, num_user) _put_data_set('first_certified_gear_at', count, num_user)
count = _join_user_all_time(InvitedUser.reflections[:sender]).count count = _subquery(assoc_key = :invited_users, num_user)
_put_data_set('invited_users', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _join_user_all_time(RecordedTrack.reflections[:user]).count count = _subquery(assoc_key = :recorded_tracks, num_user)
_put_data_set('recorded_tracks', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _join_user_all_time(Friendship.reflections[:user]) count = _subquery(assoc_key = :friendships, num_user)
.joins("INNER JOIN friendships AS fff ON fff.friend_id = uu.id") _put_data_set(assoc_key, count, num_user)
.where(['fff.created_at >= ? AND fff.created_at <= ?',
self.start_date, self.end_date]).count
count /= 2
_put_data_set('friendships', count, num_user)
count = _join_user_all_time(JamTrackRight.reflections[:user]).count count = _subquery(assoc_key = :jam_track_rights, num_user)
_put_data_set('jam_track_rights', count, num_user) _put_data_set(assoc_key, count, num_user)
count = _join_user_all_time(MusicSessionUserHistory.reflections[:user]).count sql = _played_online_subquery(' >= 1')
_put_data_set('music_sessions_user_history', count, num_user) count = self.class.cohort_users(self).where("users.id IN (#{sql})").count
_put_data_set(:music_sessions_user_history, count, num_user)
self.save! self.save!
end end
def populate! def populate!
self.cumulative ? _all_time! : _monthly! self.all_time ? _all_time! : _monthly!
end
def self.monthly_cohorts(monthly_start, monthly_end)
self.generate_monthly_cohorts(monthly_start, monthly_end).compact.each do |cc|
cc._monthly!
end
end end
end end

View File

@ -1,13 +1,19 @@
CREATE TABLE cohorts ( CREATE TABLE cohorts (
id VARCHAR(64) PRIMARY KEY DEFAULT uuid_generate_v4(), id VARCHAR(64) PRIMARY KEY DEFAULT uuid_generate_v4(),
start_date TIMESTAMP NOT NULL,
end_date TIMESTAMP NOT NULL,
cumulative BOOLEAN NOT NULL DEFAULT FALSE,
data_set JSON NOT NULL DEFAULT '{}', data_set JSON NOT NULL DEFAULT '{}',
group_start TIMESTAMP NOT NULL,
group_end TIMESTAMP NOT NULL,
all_time BOOLEAN NOT NULL DEFAULT FALSE,
monthly_start TIMESTAMP,
monthly_end TIMESTAMP,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
); );
CREATE INDEX index_started_date ON cohorts USING btree (start_date); CREATE INDEX index_group_date ON cohorts USING btree (group_start);
CREATE INDEX msuh_music_session_idx ON music_sessions_user_history USING btree(music_session_id);