From 5265477384a41cd530b27101899ccfc5cb1cf23d Mon Sep 17 00:00:00 2001 From: "Karl O. Pinc kop@karlpinc.com" Date: Sat, 11 Oct 2025 23:42:58 +0000 Subject: [PATCH] Split load_data.sql into multiple files to support staged conversion See the conversion/Makefile comment for more info on staged conversion and using db statistics. --- conversion/Makefile | 72 +- conversion/load_biography.sql | 90 +++ conversion/load_biography_update_log.sql | 44 ++ conversion/load_community_membership.sql | 43 ++ .../load_community_membership_update_log.sql | 45 ++ conversion/load_data.sql | 615 ------------------ conversion/load_finish.sql | 32 + conversion/load_follow_arrival.sql | 198 ++++++ conversion/load_follow_to_follows.sql | 155 +++++ conversion/load_follow_to_other.sql | 68 ++ conversion/load_support.sql | 131 ++++ 11 files changed, 876 insertions(+), 617 deletions(-) create mode 100644 conversion/load_biography.sql create mode 100644 conversion/load_biography_update_log.sql create mode 100644 conversion/load_community_membership.sql create mode 100644 conversion/load_community_membership_update_log.sql delete mode 100644 conversion/load_data.sql create mode 100644 conversion/load_finish.sql create mode 100644 conversion/load_follow_arrival.sql create mode 100644 conversion/load_follow_to_follows.sql create mode 100644 conversion/load_follow_to_other.sql create mode 100644 conversion/load_support.sql diff --git a/conversion/Makefile b/conversion/Makefile index e5b0d31..1f23e01 100644 --- a/conversion/Makefile +++ b/conversion/Makefile @@ -260,8 +260,76 @@ after_make_clean: load_data ## load_data Copy the data from the clean schema into the ## production schemas. +# Loading is done in stages, because a given connection caches +# query plans and trigger functions, within a connection database +# statistics do not, effectively, update. Because there are no +# statistics available when there's no data in a table, the query +# planner does not make good plans. So loading into an empty table +# can be very slow, especially if the other tables in the db have +# no statistics either. Loading in multiple connections, and having +# each connection end in an ANALYZE of the tables it loads, solves +# this problem. +# See: +# https://www.postgresql.org/docs/current/plpgsql-implementation.html#PLPGSQL-PLAN-CACHING .PHONY: load_data -load_data: $(PSQL_DEPENDS) +load_data: load_support load_biography load_community_membership \ + load_biography_update_log load_community_membership_update_log \ + load_follow_to_follows load_follow_to_other \ + load_follow_arrival \ + load_finish + +.PHONY: load_support +load_support: $(PSQL_DEPENDS) ( $(PSQL_SETUP) \ - cat load_data.sql ;) \ + cat load_support.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +# Data tables, by input table + +.PHONY: load_biography +load_biography: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_biography.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_community_membership +load_community_membership: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_community_membership.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_biography_update_log +load_biography_update_log: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_biography_update_log.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_community_membership_update_log +load_community_membership_update_log: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_community_membership_update_log.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_follow_to_follows +load_follow_to_follows: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_follow_to_follows.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_follow_to_other +load_follow_to_other: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_follow_to_other.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_follow_arrival +load_follow_arrival: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_follow_arrival.sql ;) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +.PHONY: load_finish +load_finish: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat load_finish.sql ;) \ | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y diff --git a/conversion/load_biography.sql b/conversion/load_biography.sql new file mode 100644 index 0000000..bda92a2 --- /dev/null +++ b/conversion/load_biography.sql @@ -0,0 +1,90 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- Data tables + +-- biography -> biography_data +INSERT INTO biography_data( + animid + ,animidnum + ,animname + ,birthcomm + ,bccertainty + ,sex + ,momid + ,dadid + ,dadprelim + ,dadidpub + ,firstborn + ,birthdate + ,bdmin + ,bdmax + ,bddist + ,entrydate + ,entrytype + ,departdate + ,departtype) + SELECT + b_animid + , b_animid_num::INT + , b_animname + , b_birthgroup + , b_bgcertainty + , b_sex + , b_momid + , CASE WHEN split_part(b_dadid, '_', 2) IS NOT DISTINCT FROM 'prelim' + THEN split_part(b_dadid, '_', 1) + WHEN b_dadid = '' + THEN NULL + ELSE b_dadid + END CASE + , CASE WHEN (b_dadid IS NULL + OR b_dadid = '') + THEN NULL + WHEN split_part(b_dadid, '_', 2) = 'prelim' + THEN TRUE + ELSE FALSE + END CASE + , CASE WHEN b_dadid_publication_info = '' + AND (b_dadid IS NOT NULL + AND b_dadid <> '') + THEN 'Unknown' + WHEN b_dadid_publication_info = '' + AND (b_dadid IS NULL + OR b_dadid = '') + THEN NULL + ELSE b_dadid_publication_info + END CASE + , b_firstborn + , b_birthdate + , b_bdmin + , b_bdmax + , b_bddist + , b_entrydate + , b_entrytype + , b_departdate + -- Discard b_departdateerror + , b_departtype + FROM clean.biography +; + +ANALYZE biography_data; diff --git a/conversion/load_biography_update_log.sql b/conversion/load_biography_update_log.sql new file mode 100644 index 0000000..44c5a4e --- /dev/null +++ b/conversion/load_biography_update_log.sql @@ -0,0 +1,44 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- biography_update_log -> biography_log +INSERT INTO biography_log ( + dateofupdate + ,animid + ,description + ,rationale + ,madeby) + SELECT + date_of_update + , chimp_id + , update_description + , update_rationale + , CASE -- This solves NULL data problems; problem #19 + WHEN made_by IS NULL + THEN 'UNK' + ELSE made_by + END CASE + FROM biography_update_log as log + -- Get a consistent id generation + ORDER BY date_of_update, chimp_id, update_rationale, made_by; + +ANALYZE biography_log; diff --git a/conversion/load_community_membership.sql b/conversion/load_community_membership.sql new file mode 100644 index 0000000..52abc2c --- /dev/null +++ b/conversion/load_community_membership.sql @@ -0,0 +1,43 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + + +-- community_membership -> comm_membs +INSERT INTO comm_membs ( + animid + ,startdate + ,enddate + ,commid + ,startsource + ,endsource) + SELECT + cm_b_animid + , cm_start_date + , cm_end_date + , cm_cl_community_id + , cm_start_source + , cm_end_source + FROM community_membership + -- Get a consistent ID generation + ORDER BY cm_start_date, cm_end_date, cm_b_animid, cm_cl_community_id; + +ANALYZE comm_membs; diff --git a/conversion/load_community_membership_update_log.sql b/conversion/load_community_membership_update_log.sql new file mode 100644 index 0000000..633aea2 --- /dev/null +++ b/conversion/load_community_membership_update_log.sql @@ -0,0 +1,45 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + + +-- community_membership_update_log -> comm_memb_log +INSERT INTO comm_memb_log ( + dateofupdate + ,animid + ,description + ,rationale + ,madeby) + SELECT + date_of_update + , chimp_id + , update_description + , update_rationale + , CASE -- This solves NULL data problems; problems #18, #16 + WHEN made_by IS NULL + THEN 'UNK' + ELSE made_by + END CASE + FROM clean.community_membership_update_log AS log + -- Get a consistent id generation + ORDER BY date_of_update, chimp_id, update_rationale, made_by; + +ANALYZE comm_memb_log; diff --git a/conversion/load_data.sql b/conversion/load_data.sql deleted file mode 100644 index ae17855..0000000 --- a/conversion/load_data.sql +++ /dev/null @@ -1,615 +0,0 @@ --- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. --- http://www.karlpinc.com/ --- --- This program is free software: you can redistribute it and/or modify --- it under the terms of the GNU Affero General Public License as --- published by the Free Software Foundation, either version 3 of the --- License, or (at your option) any later version. --- --- This program is distributed in the hope that it will be useful, --- but WITHOUT ANY WARRANTY; without even the implied warranty of --- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the --- GNU Affero General Public License for more details. --- --- You should have received a copy of the GNU Affero General Public License --- along with this program. If not, see . --- --- Karl O. Pinc - --- Setup the search path -SET search_path TO sokwedb, codes, lib, clean, libconv; - - --- Support tables - --- arrival_sources --- From the follow_arrival part of the MS Word doc for the MS Access db -INSERT INTO arrival_sources (code, description) - VALUES ('Tiki', 'Data came from Tiki (either typed or written in)') - , ('Tiki_Mom' - , 'Juvenile was assumed to be with their mother.' - || ' Duplicated mother’s arrival for juvenile.') - , ('Tiki_ID' - , 'Juvenile was present with another chimp (e.g. sister).' - || ' Used that chimp’s tiki arrival for juvenile.') - , ('Brec' - , 'Juvenile arrival reconstructed from Brec notes.'); - --- certainties --- From the MS Word chimp document -INSERT INTO certainties (certainty, description) - VALUES ('0', 'Chimp suspected to be nearby (maybe hidden for the moment)') - , ('1', 'Chimp definitely seen'); - - --- comm_membs_sources --- Obtain these from community_membership -INSERT INTO comm_membs_sources (commmembssource, description) - WITH sources AS ( - SELECT cm_start_source AS source - FROM clean.community_membership - GROUP BY cm_start_source - UNION - SELECT cm_end_source AS source - FROM clean.community_membership - GROUP BY cm_end_source) - SELECT source, source - FROM sources - GROUP BY source; - --- community_lookup -> comm_ids -INSERT INTO comm_ids (commid, name, notes, membcriteria) - SELECT community_lookup.cl_community_id - , community_lookup.cl_community_name - , community_lookup.cl_details - , '' - FROM community_lookup; --- Problem #12 -- hacked here -insert into comm_ids (commid, name, notes, membcriteria) - values ('KL', 'Kalande', '', '') - , ('KL_KK', 'Kasekela/kalande', '', ''); - --- cycle_states -INSERT INTO cycle_states (code, description) - VALUES ('0', 'Not swollen') - , ('0.25', '1/4 swollen') - , ('0.5', '1/2 swollen') - , ('0.75', '3/4 swollen') - , ('1', 'Fully swollen') - , ('U', 'Adolescent swelling') - , ('n/a', 'Male') - , ('MISS', 'Missing data'); -- problem #42 - --- departtypes -INSERT INTO departtypes (departtype, description) - VALUES ('D', 'Death') - , ('E', 'Emigration') - , ('P', 'Permanent disappearance') - , ('O', 'End of observation; Present in the most recent census') - , ('X', 'Used for unknown individuals'); - -show search_path; --- entrytypes -INSERT INTO entrytypes (entrytype, description) - VALUES ('B', 'Birth') - , ('I', 'Immigration') - , ('C', 'Start of confirmed AnimID') - , ('O', 'Initiation of close observation') - , ('X', 'Used for unknown individuals'); - --- people --- Copied from clean.people, which we manually constructed. -INSERT INTO people (person, name, description, active) - SELECT person, name, description, TRUE - FROM clean.people; - - --- obs_periods -INSERT INTO obs_periods (period, description) - VALUES ('AM', 'Morning') - , ('PM', 'Afternoon') - , ('UNK', 'Unknown'); - --- role_codes -INSERT INTO role_codes (role, description) - VALUES ('Actor', 'The initator in a directed dyadic interaction') - , ('Actee', 'The receipient in a directed dyadic interaction') - , ('Solo', 'A participant in an event, non-directed when there are mulitple participants'); - --- Data tables - --- biography -> biography_data -INSERT INTO biography_data( - animid - ,animidnum - ,animname - ,birthcomm - ,bccertainty - ,sex - ,momid - ,dadid - ,dadprelim - ,dadidpub - ,firstborn - ,birthdate - ,bdmin - ,bdmax - ,bddist - ,entrydate - ,entrytype - ,departdate - ,departtype) - SELECT - b_animid - , b_animid_num::INT - , b_animname - , b_birthgroup - , b_bgcertainty - , b_sex - , b_momid - , CASE WHEN split_part(b_dadid, '_', 2) IS NOT DISTINCT FROM 'prelim' - THEN split_part(b_dadid, '_', 1) - WHEN b_dadid = '' - THEN NULL - ELSE b_dadid - END CASE - , CASE WHEN (b_dadid IS NULL - OR b_dadid = '') - THEN NULL - WHEN split_part(b_dadid, '_', 2) = 'prelim' - THEN TRUE - ELSE FALSE - END CASE - , CASE WHEN b_dadid_publication_info = '' - AND (b_dadid IS NOT NULL - AND b_dadid <> '') - THEN 'Unknown' - WHEN b_dadid_publication_info = '' - AND (b_dadid IS NULL - OR b_dadid = '') - THEN NULL - ELSE b_dadid_publication_info - END CASE - , b_firstborn - , b_birthdate - , b_bdmin - , b_bdmax - , b_bddist - , b_entrydate - , b_entrytype - , b_departdate - -- Discard b_departdateerror - , b_departtype - FROM clean.biography; - - --- community_membership -> comm_membs -INSERT INTO comm_membs ( - animid - ,startdate - ,enddate - ,commid - ,startsource - ,endsource) - SELECT - cm_b_animid - , cm_start_date - , cm_end_date - , cm_cl_community_id - , cm_start_source - , cm_end_source - FROM community_membership - -- Get a consistent ID generation - ORDER BY cm_start_date, cm_end_date, cm_b_animid, cm_cl_community_id; - - --- biography_update_log -> biography_log -INSERT INTO biography_log ( - dateofupdate - ,animid - ,description - ,rationale - ,madeby) - SELECT - date_of_update - , chimp_id - , update_description - , update_rationale - , CASE -- This solves NULL data problems; problem #19 - WHEN made_by IS NULL - THEN 'UNK' - ELSE made_by - END CASE - FROM biography_update_log as log - -- Get a consistent id generation - ORDER BY date_of_update, chimp_id, update_rationale, made_by; - - --- community_membership_update_log -> comm_memb_log -INSERT INTO comm_memb_log ( - dateofupdate - ,animid - ,description - ,rationale - ,madeby) - SELECT - date_of_update - , chimp_id - , update_description - , update_rationale - , CASE -- This solves NULL data problems; problems #18, #16 - WHEN made_by IS NULL - THEN 'UNK' - ELSE made_by - END CASE - FROM clean.community_membership_update_log AS log - -- Get a consistent id generation - ORDER BY date_of_update, chimp_id, update_rationale, made_by; - - --- --- Follow stuff --- - --- Sanity checks -DO $$ -BEGIN --- Start of follow time is NULL -PERFORM * FROM clean.follow_arrival WHERE fa_time_start IS NULL; -IF FOUND THEN - RAISE data_exception USING - MESSAGE = 'There are follow_arrivals where fa_time_start is NULL'; -END IF; - --- End of follow time is NULL -PERFORM * FROM clean.follow_arrival WHERE fa_time_end IS NULL; -IF FOUND THEN - RAISE data_exception USING - MESSAGE = 'There are follow_arrivals where fa_time_end is NULL'; -END IF; - --- follow_arrival.fa_type_of_nesting has a valid value -PERFORM * - FROM clean.follow_arrival - WHERE fa_type_of_nesting NOT IN (0, 1, 2, 3); -IF FOUND THEN - RAISE data_exception USING - MESSAGE = 'There are follow_arrivals where fa_type_of_nesting is not ' - || '0, 1, 2, 3, 4'; -END IF; - --- follow.fol_flag_begin_in_nest is not 0 or 1 -PERFORM * - FROM clean.follow - WHERE fol_flag_begin_in_nest <> 0 AND fol_flag_begin_in_nest <> 1; -IF FOUND THEN - RAISE data_exception USING - MESSAGE = 'There are follows where fol_flag_begin_in_nest is not ' - || '0 or 1'; -END IF; - --- follow.fol_flag_end_in_nest is not 0 or 1 -PERFORM * - FROM clean.follow - WHERE fol_flag_end_in_nest <> 0 AND fol_flag_end_in_nest <> 1; -IF FOUND THEN - RAISE data_exception USING - MESSAGE = 'There are follows where fol_flag_end_in_nest is not ' - || '0 or 1'; -END IF; - -END; -$$; - --- --- follow -> FOLLOWS --- -INSERT INTO follows ( - focal -, community -, date -, notes) -SELECT fol_b_animid - , fol_cl_community_id - , fol_date - , '' - FROM clean.follow - ORDER BY fol_b_animid, fol_date; - --- --- follow -> FOLLOW_STUDIES --- --- Load all the new studies into STUDIES -INSERT INTO studies (study, description) - WITH new_studies AS ( - SELECT UPPER(follow.fol_study_code_1) AS study - FROM clean.follow - WHERE follow.fol_study_code_1 IS NOT NULL - GROUP BY UPPER(follow.fol_study_code_1) - UNION - SELECT UPPER(follow.fol_study_code_2) AS study - FROM clean.follow - WHERE follow.fol_study_code_2 IS NOT NULL - GROUP BY UPPER(follow.fol_study_code_2) - ) - SELECT new_studies.study, new_studies.study - FROM new_studies - GROUP BY new_studies.study - ON CONFLICT (study) - DO NOTHING; - --- follow.fol_study_code_1 -> FOLLOW_STUDIES -INSERT INTO follow_studies (fid, code) - SELECT - (SELECT follows.fid - FROM follows - WHERE follows.focal = follow.fol_b_animid - AND follows.date = follow.fol_date) - , UPPER(follow.fol_study_code_1) - FROM clean.follow - WHERE fol_study_code_1 IS NOT NULL; - --- follow.fol_study_code_2 -> FOLLOW_STUDIES -INSERT INTO follow_studies (fid, code) - SELECT - (SELECT follows.fid - FROM follows - WHERE follows.focal = follow.fol_b_animid - AND follows.date = follow.fol_date) - , UPPER(follow.fol_study_code_2) - FROM clean.follow - WHERE fol_study_code_2 IS NOT NULL; - - --- --- follow -> FOLLOW_OBSERVERS --- - --- AM rows -INSERT INTO follow_observers ( - fid -, period -, obs_brec -, obs_tiki) -WITH obs AS ( -SELECT follow.fol_b_animid - , follow.fol_date - , follow.fol_am_observer_1 AS am_obs1 - , follow.fol_am_observer_2 AS am_obs2 - , follow.fol_pm_observer_1 AS pm_obs1 - , follow.fol_pm_observer_2 AS pm_obs2 - FROM clean.follow -) -SELECT - (SELECT follows.fid - FROM follows - WHERE follows.focal = obs.fol_b_animid - AND follows.date = obs.fol_date) -, 'AM' -, COALESCE(obs.am_obs1, 'NONE') -- problem #38 -, COALESCE(obs.am_obs2, 'NONE') -- problem #38 - FROM obs - -- Don't try to load follows that haven't been converted - JOIN follows ON (follows.focal = obs.fol_b_animid - AND follows.date = obs.fol_date) - WHERE COALESCE(obs.am_obs1, obs.am_obs2) - IS NOT NULL; - --- PM rows -INSERT INTO follow_observers ( - fid -, period -, obs_brec -, obs_tiki) -WITH obs AS ( -SELECT upper( -- problem #31 - btrim( -- problem #32 - follow.fol_b_animid)) AS fol_b_animid - , follow.fol_date - , follow.fol_am_observer_1 AS am_obs1 - , follow.fol_am_observer_2 AS am_obs2 - , follow.fol_pm_observer_1 AS pm_obs1 - , follow.fol_pm_observer_2 AS pm_obs2 - FROM clean.follow -) -SELECT - (SELECT follows.fid - FROM follows - WHERE follows.focal = obs.fol_b_animid - AND follows.date = obs.fol_date) -, 'PM' -, COALESCE(obs.pm_obs1, 'NONE') -- problem #38 -, COALESCE(obs.pm_obs2, 'NONE') -- problem #38 - FROM obs - -- Don't try to load follows that haven't been converted - JOIN follows ON (follows.focal = obs.fol_b_animid - AND follows.date = obs.fol_date) - WHERE COALESCE(obs.pm_obs1, obs.pm_obs2) - IS NOT NULL; - --- The UNK time period -INSERT INTO follow_observers ( - fid -, period -, obs_brec -, obs_tiki) -SELECT - (SELECT follows.fid - FROM follows - WHERE follows.focal = follow.fol_b_animid - AND follows.date = follow.fol_date) -, 'UNK' -, 'NONE' -, 'NONE' - FROM clean.follow - WHERE - -- Problem #37 - (fol_date = '1974-01-10' AND fol_b_animid = 'SW') - OR (fol_date = '1974-01-18' AND fol_b_animid = 'PL') - OR (fol_date = '1974-08-13' AND fol_b_animid = 'PM') - OR (fol_date = '1974-12-18' AND fol_b_animid = 'HG') - OR (fol_date = '1975-01-08' AND fol_b_animid = 'NV') - OR (fol_date = '1975-01-15' AND fol_b_animid = 'ST') - OR (fol_date = '1975-01-16' AND fol_b_animid = 'FB') - OR (fol_date = '1975-02-05' AND fol_b_animid = 'GB') - OR (fol_date = '1975-04-20' AND fol_b_animid = 'LB') - OR (fol_date = '1977-08-03' AND fol_b_animid = 'ML') - OR (fol_date = '1977-09-21' AND fol_b_animid = 'ML') - OR (fol_date = '1977-12-02' AND fol_b_animid = 'HR') - OR (fol_date = '1978-04-14' AND fol_b_animid = 'SH') - OR (fol_date = '1978-05-16' AND fol_b_animid = 'HM') - OR (fol_date = '1978-06-05' AND fol_b_animid = 'FG') - OR (fol_date = '1978-06-28' AND fol_b_animid = 'MU') - OR (fol_date = '1978-07-05' AND fol_b_animid = 'FF') - OR (fol_date = '1978-07-06' AND fol_b_animid = 'FG') - OR (fol_date = '1978-07-10' AND fol_b_animid = 'GB') - OR (fol_date = '1978-07-24' AND fol_b_animid = 'GB') - OR (fol_date = '1978-08-11' AND fol_b_animid = 'GB') - OR (fol_date = '1978-12-30' AND fol_b_animid = 'EV') - OR (fol_date = '1979-03-02' AND fol_b_animid = 'HR') - OR (fol_date = '1979-03-14' AND fol_b_animid = 'FG') - OR (fol_date = '1979-04-14' AND fol_b_animid = 'FG') - OR (fol_date = '1979-04-26' AND fol_b_animid = 'SH') - OR (fol_date = '1980-10-27' AND fol_b_animid = 'ML') - OR (fol_date = '1980-10-31' AND fol_b_animid = 'ML') - OR (fol_date = '1981-07-22' AND fol_b_animid = 'MF') - OR (fol_date = '1981-07-26' AND fol_b_animid = 'MU') - OR (fol_date = '1981-07-27' AND fol_b_animid = 'MF') - OR (fol_date = '1981-08-17' AND fol_b_animid = 'MF') - OR (fol_date = '1981-08-18' AND fol_b_animid = 'MF') - OR (fol_date = '1982-02-09' AND fol_b_animid = 'EV') - OR (fol_date = '1982-08-13' AND fol_b_animid = 'ML') - OR (fol_date = '1984-02-08' AND fol_b_animid = 'FF') - OR (fol_date = '1984-05-27' AND fol_b_animid = 'GB') - OR (fol_date = '1984-05-28' AND fol_b_animid = 'GM') - OR (fol_date = '1984-06-06' AND fol_b_animid = 'ST') - OR (fol_date = '1984-08-28' AND fol_b_animid = 'GM') - OR (fol_date = '1986-03-20' AND fol_b_animid = 'FD') - OR (fol_date = '1987-09-19' AND fol_b_animid = 'PI') - OR (fol_date = '1988-07-21' AND fol_b_animid = 'ME') - OR (fol_date = '1988-09-06' AND fol_b_animid = 'GB') - OR (fol_date = '1989-01-09' AND fol_b_animid = 'AL') - OR (fol_date = '1993-04-01' AND fol_b_animid = 'PF') - OR (fol_date = '1995-05-19' AND fol_b_animid = 'TB') - OR (fol_date = '1998-07-13' AND fol_b_animid = 'WL') - OR (fol_date = '1998-09-28' AND fol_b_animid = 'SL') - OR (fol_date = '1999-01-03' AND fol_b_animid = 'FF') - OR (fol_date = '2012-08-27' AND fol_b_animid = 'KON') - OR (fol_date = '2014-10-27' AND fol_b_animid = 'WEM') - OR (fol_date = '2014-11-23' AND fol_b_animid = 'WEM') - OR (fol_date = '2014-12-23' AND fol_b_animid = 'EZA') - OR (fol_date = '2015-01-05' AND fol_b_animid = 'VAN') - OR (fol_date = '2015-01-17' AND fol_b_animid = 'SI'); - - --- --- follow_arrival --- - --- The processing of follow_arrival is done row-by-row. --- This makes it possible to associate the correct events --- with the correct roles, etc., which might not be possible --- if, say, events are created without roles and then there --- are multiple arriving chimps that each need to be related --- to "their own" event. In any case, we are optimizing for --- clarity and accuracy, not speed, so this approach works --- best. - -DO $$ -DECLARE - this_fa clean.follow_arrival%ROWTYPE; - related_fid follows.fid%TYPE; - -BEGIN - - FOR this_fa IN - SELECT * - FROM clean.follow_arrival - WHERE EXISTS -- Problem #43 - (SELECT 1 - FROM follows - WHERE follows.focal = follow_arrival.fa_fol_b_focal_animid - AND follows.date = follow_arrival.fa_fol_date) - ORDER BY follow_arrival.fa_fol_date - , follow_arrival.fa_fol_b_focal_animid - , follow_arrival.fa_b_arr_animid - LOOP - -- Which follow belongs to the follow_arrival? - SELECT follows.fid - INTO STRICT related_fid - FROM follows - WHERE follows.focal = this_fa.fa_fol_b_focal_animid - AND follows.date = this_fa.fa_fol_date; - - -- follow_arrival -> EVENTS - INSERT INTO events ( - fid - , behavior - , start - , stop - , certainty - , notes) - VALUES ( - related_fid - , 'ARR' - , this_fa.fa_time_start - , this_fa.fa_time_end - , this_fa.fa_type_of_certainty - , ''); - - -- follow_arrival -> ROLES - INSERT INTO roles ( - eid - , role - , participant) - VALUES ( - currval('events_eid_seq') - , 'Solo' - , this_fa.fa_b_arr_animid); - - -- follow_arrival -> ARRIVALS - INSERT INTO arrivals ( - eid - , neststart - , nestend - , cycle - , datasource) - VALUES ( - currval('events_eid_seq') - , CASE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 0 - THEN FALSE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 1 - THEN TRUE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 2 - THEN FALSE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 3 - THEN TRUE - END - , CASE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 0 - THEN FALSE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 1 - THEN FALSE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 2 - THEN TRUE - WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 3 - THEN TRUE - END - , this_fa.fa_type_of_cycle - , this_fa.fa_data_source); - - END LOOP; -END; -$$; -- follow_arrival DO block - --- --- Finish --- - --- Make the unknown person unusable. -UPDATE people - SET active = FALSE - WHERE person = 'UNK' - OR person = 'NONE' - OR person LIKE '%/%'; -- problem #39 diff --git a/conversion/load_finish.sql b/conversion/load_finish.sql new file mode 100644 index 0000000..658c737 --- /dev/null +++ b/conversion/load_finish.sql @@ -0,0 +1,32 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- +-- Finish +-- + +-- Make the unknown person unusable. +UPDATE people + SET active = FALSE + WHERE person = 'UNK' + OR person = 'NONE' + OR person LIKE '%/%'; -- problem #39 diff --git a/conversion/load_follow_arrival.sql b/conversion/load_follow_arrival.sql new file mode 100644 index 0000000..7a8f78f --- /dev/null +++ b/conversion/load_follow_arrival.sql @@ -0,0 +1,198 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- +-- Follow_arrival +-- + +-- Sanity checks +DO $$ +BEGIN +-- Start of follow time is NULL +PERFORM * FROM clean.follow_arrival WHERE fa_time_start IS NULL; +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follow_arrivals where fa_time_start is NULL'; +END IF; + +-- End of follow time is NULL +PERFORM * FROM clean.follow_arrival WHERE fa_time_end IS NULL; +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follow_arrivals where fa_time_end is NULL'; +END IF; + +-- follow_arrival.fa_type_of_nesting has a valid value +PERFORM * + FROM clean.follow_arrival + WHERE (fa_type_of_nesting NOT IN (0, 1, 2, 3) + OR fa_type_of_nesting IS NULL); +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follow_arrivals where fa_type_of_nesting is not ' + || '0, 1, 2, 3, 4'; +END IF; + +-- follow.fol_flag_begin_in_nest is not 0 or 1 +PERFORM * + FROM clean.follow + WHERE fol_flag_begin_in_nest <> 0 AND fol_flag_begin_in_nest <> 1; +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follows where fol_flag_begin_in_nest is not ' + || '0 or 1'; +END IF; + +-- follow.fol_flag_end_in_nest is not 0 or 1 +PERFORM * + FROM clean.follow + WHERE fol_flag_end_in_nest <> 0 AND fol_flag_end_in_nest <> 1; +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follows where fol_flag_end_in_nest is not ' + || '0 or 1'; +END IF; + +END; +$$; + + +-- +-- follow_arrival +-- + +-- The processing of follow_arrival is done row-by-row. +-- This makes it possible to associate the correct events +-- with the correct roles, etc., which might not be possible +-- if, say, events are created without roles and then there +-- are multiple arriving chimps that each need to be related +-- to "their own" event. In any case, we are optimizing for +-- clarity and accuracy, not speed, so this approach works +-- best. + +DO $$ +DECLARE + this_fa clean.follow_arrival%ROWTYPE; + related_fid follows.fid%TYPE; + +BEGIN + + FOR this_fa IN + SELECT * + FROM clean.follow_arrival + ORDER BY follow_arrival.fa_fol_date + , follow_arrival.fa_fol_b_focal_animid + , follow_arrival.fa_b_arr_animid + LOOP + -- Which follow belongs to the follow_arrival? + SELECT follows.fid + INTO STRICT related_fid + FROM follows + WHERE follows.focal = this_fa.fa_fol_b_focal_animid + AND follows.date = this_fa.fa_fol_date; + + -- follow_arrival -> EVENTS + INSERT INTO events ( + fid + , behavior + , start + , stop + , certainty + , notes) + VALUES ( + related_fid + , 'ARR' + , this_fa.fa_time_start + , this_fa.fa_time_end + , this_fa.fa_type_of_certainty + , ''); + + -- follow_arrival -> ROLES + INSERT INTO roles ( + eid + , role + , participant) + VALUES ( + currval('events_eid_seq') + , 'Solo' + , this_fa.fa_b_arr_animid); + + -- follow_arrival -> ARRIVALS + INSERT INTO arrivals ( + eid + , neststart + , nestend + , cycle + , datasource) + VALUES ( + currval('events_eid_seq') + , CASE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 0 + THEN FALSE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 1 + THEN TRUE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 2 + THEN FALSE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 3 + THEN TRUE + END + , CASE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 0 + THEN FALSE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 1 + THEN FALSE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 2 + THEN TRUE + WHEN this_fa.fa_type_of_nesting IS NOT DISTINCT FROM 3 + THEN TRUE + END + , this_fa.fa_type_of_cycle + , this_fa.fa_data_source); + + END LOOP; +END; +$$; -- follow_arrival DO block + +-- Sanity checks +SELECT 'Sanity checks commented out!'; +DO $$ +BEGIN +if false then +-- Start of follow time is NULL +PERFORM * + FROM clean.follow_arrival + WHERE NOT EXISTS + (SELECT follows.fid + FROM follows + WHERE follows.focal = follow_arrival.fa_fol_b_focal_animid + AND follows.date = follow_arrival.fa_fol_date); +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follow_arrivals where there is no follow'; +END IF; +end if; +END; +$$; + +ANALYZE events + , roles + , arrivals; diff --git a/conversion/load_follow_to_follows.sql b/conversion/load_follow_to_follows.sql new file mode 100644 index 0000000..996b2f0 --- /dev/null +++ b/conversion/load_follow_to_follows.sql @@ -0,0 +1,155 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc +-- +-- Remarks: +-- This does both FOLLOWS and FOLLOW_OBSERVERS because a FOLLOW_OBSERVERS +-- row is required for every FOLLOWS row. + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- follow -> follows + +-- Sanity checks +DO $$ +BEGIN +-- follow.fol_flag_begin_in_nest is not 0 or 1 +PERFORM * + FROM clean.follow + WHERE fol_flag_begin_in_nest <> 0 AND fol_flag_begin_in_nest <> 1; +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follows where fol_flag_begin_in_nest is not ' + || '0 or 1'; +END IF; + +-- follow.fol_flag_end_in_nest is not 0 or 1 +PERFORM * + FROM clean.follow + WHERE fol_flag_end_in_nest <> 0 AND fol_flag_end_in_nest <> 1; +IF FOUND THEN + RAISE data_exception USING + MESSAGE = 'There are follows where fol_flag_end_in_nest is not ' + || '0 or 1'; +END IF; + +END; +$$; + +-- +-- follow -> FOLLOWS +-- +INSERT INTO follows ( + focal +, community +, date +, notes) +SELECT fol_b_animid + , fol_cl_community_id + , fol_date + , '' + FROM clean.follow + ORDER BY fol_b_animid, fol_date; + + +-- +-- follow -> FOLLOW_OBSERVERS +-- + +-- AM rows +INSERT INTO follow_observers ( + fid +, period +, obs_brec +, obs_tiki) +WITH obs AS ( +SELECT follow.fol_b_animid + , follow.fol_date + , follow.fol_am_observer_1 AS am_obs1 + , follow.fol_am_observer_2 AS am_obs2 + , follow.fol_pm_observer_1 AS pm_obs1 + , follow.fol_pm_observer_2 AS pm_obs2 + FROM clean.follow +) +SELECT + (SELECT follows.fid + FROM follows + WHERE follows.focal = obs.fol_b_animid + AND follows.date = obs.fol_date) +, 'AM' +, COALESCE(obs.am_obs1, 'NONE') -- problem #38 +, COALESCE(obs.am_obs2, 'NONE') -- problem #38 + FROM obs + -- Don't try to load follows that haven't been converted + JOIN follows ON (follows.focal = obs.fol_b_animid + AND follows.date = obs.fol_date) + WHERE COALESCE(obs.am_obs1, obs.am_obs2) + IS NOT NULL; + +-- PM rows +INSERT INTO follow_observers ( + fid +, period +, obs_brec +, obs_tiki) +WITH obs AS ( +SELECT upper( -- problem #31 + btrim( -- problem #32 + follow.fol_b_animid)) AS fol_b_animid + , follow.fol_date + , follow.fol_am_observer_1 AS am_obs1 + , follow.fol_am_observer_2 AS am_obs2 + , follow.fol_pm_observer_1 AS pm_obs1 + , follow.fol_pm_observer_2 AS pm_obs2 + FROM clean.follow +) +SELECT + (SELECT follows.fid + FROM follows + WHERE follows.focal = obs.fol_b_animid + AND follows.date = obs.fol_date) +, 'PM' +, COALESCE(obs.pm_obs1, 'NONE') -- problem #38 +, COALESCE(obs.pm_obs2, 'NONE') -- problem #38 + FROM obs + -- Don't try to load follows that haven't been converted + JOIN follows ON (follows.focal = obs.fol_b_animid + AND follows.date = obs.fol_date) + WHERE COALESCE(obs.pm_obs1, obs.pm_obs2) + IS NOT NULL; + +-- The UNK time period +INSERT INTO follow_observers ( + fid +, period +, obs_brec +, obs_tiki) +SELECT + (SELECT follows.fid + FROM follows + WHERE follows.focal = follow.fol_b_animid + AND follows.date = follow.fol_date) +, 'UNK' +, 'NONE' +, 'NONE' + FROM clean.follow; + + +ANALYZE follows + , follow_observers; diff --git a/conversion/load_follow_to_other.sql b/conversion/load_follow_to_other.sql new file mode 100644 index 0000000..b0a293e --- /dev/null +++ b/conversion/load_follow_to_other.sql @@ -0,0 +1,68 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- +-- follow -> FOLLOW_STUDIES +-- +-- Load all the new studies into STUDIES +INSERT INTO studies (study, description) + WITH new_studies AS ( + SELECT UPPER(follow.fol_study_code_1) AS study + FROM clean.follow + WHERE follow.fol_study_code_1 IS NOT NULL + GROUP BY UPPER(follow.fol_study_code_1) + UNION + SELECT UPPER(follow.fol_study_code_2) AS study + FROM clean.follow + WHERE follow.fol_study_code_2 IS NOT NULL + GROUP BY UPPER(follow.fol_study_code_2) + ) + SELECT new_studies.study, new_studies.study + FROM new_studies + GROUP BY new_studies.study + ON CONFLICT (study) + DO NOTHING; + +-- follow.fol_study_code_1 -> FOLLOW_STUDIES +INSERT INTO follow_studies (fid, code) + SELECT + (SELECT follows.fid + FROM follows + WHERE follows.focal = follow.fol_b_animid + AND follows.date = follow.fol_date) + , UPPER(follow.fol_study_code_1) + FROM clean.follow + WHERE fol_study_code_1 IS NOT NULL; + +-- follow.fol_study_code_2 -> FOLLOW_STUDIES +INSERT INTO follow_studies (fid, code) + SELECT + (SELECT follows.fid + FROM follows + WHERE follows.focal = follow.fol_b_animid + AND follows.date = follow.fol_date) + , UPPER(follow.fol_study_code_2) + FROM clean.follow + WHERE fol_study_code_2 IS NOT NULL; + +ANALYZE studies + , follow_studies; diff --git a/conversion/load_support.sql b/conversion/load_support.sql new file mode 100644 index 0000000..5331232 --- /dev/null +++ b/conversion/load_support.sql @@ -0,0 +1,131 @@ +-- Copyright (C) 2023, 2024, 2025 The Meme Factory, Inc. +-- http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + +-- Setup the search path +SET search_path TO sokwedb, codes, lib, clean, libconv; + + +-- Support tables + +-- arrival_sources +-- From the follow_arrival part of the MS Word doc for the MS Access db +INSERT INTO arrival_sources (code, description) + VALUES ('Tiki', 'Data came from Tiki (either typed or written in)') + , ('Tiki_Mom' + , 'Juvenile was assumed to be with their mother.' + || ' Duplicated mother’s arrival for juvenile.') + , ('Tiki_ID' + , 'Juvenile was present with another chimp (e.g. sister).' + || ' Used that chimp’s tiki arrival for juvenile.') + , ('Brec' + , 'Juvenile arrival reconstructed from Brec notes.'); + +-- certainties +-- From the MS Word chimp document +INSERT INTO certainties (certainty, description) + VALUES ('0', 'Chimp suspected to be nearby (maybe hidden for the moment)') + , ('1', 'Chimp definitely seen'); + + +-- comm_membs_sources +-- Obtain these from community_membership +INSERT INTO comm_membs_sources (commmembssource, description) + WITH sources AS ( + SELECT cm_start_source AS source + FROM clean.community_membership + GROUP BY cm_start_source + UNION + SELECT cm_end_source AS source + FROM clean.community_membership + GROUP BY cm_end_source) + SELECT source, source + FROM sources + GROUP BY source; + +-- community_lookup -> comm_ids +INSERT INTO comm_ids (commid, name, notes, membcriteria) + SELECT community_lookup.cl_community_id + , community_lookup.cl_community_name + , community_lookup.cl_details + , '' + FROM community_lookup; +-- Problem #12 -- hacked here +insert into comm_ids (commid, name, notes, membcriteria) + values ('KL', 'Kalande', '', '') + , ('KL_KK', 'Kasekela/kalande', '', ''); + +-- cycle_states +INSERT INTO cycle_states (code, description) + VALUES ('0', 'Not swollen') + , ('0.25', '1/4 swollen') + , ('0.5', '1/2 swollen') + , ('0.75', '3/4 swollen') + , ('1', 'Fully swollen') + , ('U', 'Adolescent swelling') + , ('n/a', 'Male') + , ('MISS', 'Missing data'); -- problem #42 + +-- departtypes +INSERT INTO departtypes (departtype, description) + VALUES ('D', 'Death') + , ('E', 'Emigration') + , ('P', 'Permanent disappearance') + , ('O', 'End of observation; Present in the most recent census') + , ('X', 'Used for unknown individuals'); + +show search_path; +-- entrytypes +INSERT INTO entrytypes (entrytype, description) + VALUES ('B', 'Birth') + , ('I', 'Immigration') + , ('C', 'Start of confirmed AnimID') + , ('O', 'Initiation of close observation') + , ('X', 'Used for unknown individuals'); + +-- people +-- Copied from clean.people, which we manually constructed. +INSERT INTO people (person, name, description, active) + SELECT person, name, description, TRUE + FROM clean.people; + + +-- obs_periods +INSERT INTO obs_periods (period, description) + VALUES ('AM', 'Morning') + , ('PM', 'Afternoon') + , ('UNK', 'Unknown'); + +-- role_codes +INSERT INTO role_codes (role, description) + VALUES ('Actor', 'The initator in a directed dyadic interaction') + , ('Actee', 'The receipient in a directed dyadic interaction') + , ('Solo', 'A participant in an event, non-directed when there are mulitple participants'); + + +-- It'd be nice to analyze the whole db, but Azure gives us errors. +-- Instead, be explicit and analyze the tables loaded. +ANALYZE arrival_sources + , certainties + , comm_membs_sources + , comm_ids + , cycle_states + , departtypes + , entrytypes + , people + , obs_periods + , role_codes; -- 2.34.1