* VRFS-2613 - optimizing hourly cleanup by moving it to the score report routine

This commit is contained in:
Seth Call 2015-01-30 10:46:02 -06:00
parent 96bc9487c4
commit 7352fa9b19
2 changed files with 125 additions and 29 deletions

View File

@ -1,11 +1,89 @@
DROP FUNCTION IF EXISTS discard_scores();
CREATE FUNCTION discard_scores (INTEGER keep) RETURNS VOID AS $$
CREATE FUNCTION discard_scores (keep INTEGER) RETURNS VOID AS $$
BEGIN
DELETE FROM scores WHERE score_dt >
(SELECT score_dt FROM scores s WHERE s.alocidispid = scores.alocidispid AND s.blocidispid = scores.blocidispid ORDER BY DESC score_dt LIMIT 1 OFFSET (keep * 2));
DELETE FROM scores WHERE score_dt <
(SELECT score_dt FROM scores s WHERE s.alocidispid = scores.alocidispid AND s.blocidispid = scores.blocidispid ORDER BY score_dt DESC LIMIT 1 OFFSET (keep - 1));
RETURN;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION update_current_network_scores(aloc BIGINT, bloc BIGINT) RETURNS VOID
STRICT VOLATILE AS $$
DECLARE
newscore INTEGER;
newscore_dt TIMESTAMP;
newscore_limited BOOL;
sum INTEGER;
kount INTEGER;
r RECORD;
avgscore INTEGER;
maxscore INTEGER;
minscore INTEGER;
BEGIN
-- find the 6 most recent scores
-- (supposedly newscore is the first...)
-- hybrid scheme: compute the average of some recent scores, then limit newscore to be between 4/5 and 6/5 of the average
newscore := NULL;
newscore_dt := NULL;
newscore_limited := FALSE;
sum := 0;
kount := 0;
FOR r IN SELECT score, score_dt FROM scores WHERE alocidispid = aloc AND blocidispid = bloc ORDER BY score_dt DESC LIMIT 6 LOOP
IF newscore IS NULL THEN
newscore := r.score;
newscore_dt := r.score_dt;
ELSE
sum := sum + r.score;
kount := kount + 1;
END IF;
END LOOP;
-- if no scores in query at all, then delete any current entry
IF newscore IS NULL THEN
DELETE FROM current_network_scores WHERE alocidispid = aloc AND blocidispid = bloc;
IF aloc != bloc THEN
DELETE FROM current_network_scores WHERE alocidispid = bloc AND blocidispid = aloc;
END IF;
END IF;
-- if there are scores older than newscore, then use their average to limit the range of newscore
IF kount > 0 THEN
avgscore := sum / kount;
maxscore := avgscore*6/5;
minscore := avgscore*4/5;
-- the score newscore will be inserted as the current value in current_network_scores, but we will limit it
-- to be no greater than 120% of the average and no less than 80% of the average. this will dampen wild
-- swings in the scores.
IF newscore > maxscore THEN
newscore := maxscore;
newscore_limited := TRUE;
ELSEIF newscore < minscore THEN
newscore := minscore;
newscore_limited := TRUE;
END IF;
END IF;
UPDATE current_network_scores SET score = newscore, limited = newscore_limited, score_dt = newscore_dt WHERE alocidispid = aloc AND blocidispid = bloc;
IF NOT FOUND THEN
INSERT INTO current_network_scores (alocidispid, blocidispid, score, limited, score_dt) VALUES (aloc, bloc, newscore, newscore_limited, newscore_dt);
END IF;
IF aloc != bloc THEN
UPDATE current_network_scores SET score = newscore, limited = newscore_limited, score_dt = newscore_dt WHERE alocidispid = bloc AND blocidispid = aloc;
IF NOT FOUND THEN
INSERT INTO current_network_scores (alocidispid, blocidispid, score, limited, score_dt) VALUES (bloc, aloc, newscore, newscore_limited, newscore_dt);
END IF;
END IF;
-- keep the scores table clean, meaning only up to the most 5 recent scores per group & direction (scorer)
DELETE FROM scores WHERE alocidispid = aloc AND blocidispid = bloc AND scorer = 0 AND score_dt <
(SELECT score_dt FROM scores s WHERE s.alocidispid = aloc AND s.blocidispid = bloc AND s.scorer = 0 ORDER BY score_dt DESC LIMIT 1 OFFSET 4);
DELETE FROM scores WHERE alocidispid = bloc AND blocidispid = aloc AND scorer = 1 AND score_dt <
(SELECT score_dt FROM scores s WHERE s.alocidispid = bloc AND s.blocidispid = aloc AND s.scorer = 1 ORDER BY score_dt DESC LIMIT 1 OFFSET 4);
END;
$$ LANGUAGE plpgsql;

View File

@ -527,24 +527,25 @@ describe Score do
end
it "discards over 5 items" do
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, nil)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, nil)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, nil)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, nil)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, nil)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, nil)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, 6.days.ago)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, 5.days.ago)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, 4.days.ago)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, 3.days.ago)
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, 2.days.ago)
Score.count.should == 12
Score.connection.execute("SELECT discard_scores(5)").check
Score.count.should == 12
Score.count.should == 10
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 26, nil)
Score.connection.execute("UPDATE scores set created_at = TIMESTAMP '#{2.days.ago}' WHERE score = 26").cmdtuples.should == 2
Score.connection.execute("SELECT discard_scores(5)").check
Score.count.should == 12
Score.connection.execute("SELECT * FROM scores WHERE score = 20").ntuples.should == 12
Score.connection.execute("SELECT * FROM scores WHERE scorer = 0").ntuples.should == 6
Score.connection.execute("SELECT * FROM scores WHERE scorer = 1").ntuples.should == 6
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 20, 1.days.ago)
Score.count.should == 10
# make a score older than all the rest; it should get whacked
Score.createx(LOCA, NODEA, ADDRA, LOCB, NODEB, ADDRB, 26, 7.days.ago)
Score.count.should == 10
Score.connection.execute("SELECT * FROM scores WHERE score = 20").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE scorer = 0").ntuples.should == 5
Score.connection.execute("SELECT * FROM scores WHERE scorer = 1").ntuples.should == 5
Score.createx(LOCB, NODEB, ADDRB, LOCA, NODEA, ADDRA, 22, nil)
@ -554,18 +555,35 @@ describe Score do
Score.createx(LOCB, NODEB, ADDRB, LOCA, NODEA, ADDRA, 22, nil)
Score.createx(LOCB, NODEB, ADDRB, LOCA, NODEA, ADDRA, 22, nil)
Score.count.should == 24
Score.connection.execute("SELECT discard_scores(5)").check
Score.count.should == 24
Score.count.should == 20
Score.createx(LOCB, NODEB, ADDRB, LOCA, NODEA, ADDRA, 36, nil)
Score.connection.execute("UPDATE scores set created_at = TIMESTAMP '#{2.days.ago}' WHERE score = 36").cmdtuples.should == 2
Score.connection.execute("SELECT discard_scores(5)").check
Score.count.should == 24
Score.connection.execute("SELECT * FROM scores WHERE score = 22").ntuples.should == 12
Score.connection.execute("SELECT * FROM scores WHERE score = 22 AND scorer = 0").ntuples.should == 6
Score.connection.execute("SELECT * FROM scores WHERE score = 22 AND scorer = 1").ntuples.should == 6
Score.connection.execute("SELECT * FROM scores WHERE score = 22").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE score = 20").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE scorer = 0").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE scorer = 1").ntuples.should == 10
Score.createx(LOCB, NODEB, ADDRB, LOCA, NODEA, ADDRA, 36, 7.days.ago)
Score.count.should == 20
Score.connection.execute("SELECT * FROM scores WHERE score = 22").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE score = 20").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE scorer = 0").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE scorer = 1").ntuples.should == 10
# let's create scores between a new location, and make sure they don't distrurb the data we have now
Score.createx(LOCC, NODEC, ADDRC, LOCA, NODEA, ADDRA, 10, nil)
Score.count.should == 22
Score.createx(LOCC, NODEC, ADDRC, LOCA, NODEA, ADDRA, 10, nil)
Score.createx(LOCC, NODEC, ADDRC, LOCA, NODEA, ADDRA, 10, nil)
Score.createx(LOCC, NODEC, ADDRC, LOCA, NODEA, ADDRA, 10, nil)
Score.createx(LOCC, NODEC, ADDRC, LOCA, NODEA, ADDRA, 10, nil)
Score.count.should == 30
Score.connection.execute("SELECT * FROM scores WHERE score = 20").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE score = 22").ntuples.should == 10
Score.connection.execute("SELECT * FROM scores WHERE score = 10").ntuples.should == 10
end
end
end