From b911a767f835fb3dd13854fe511e8c44f9c58dcb Mon Sep 17 00:00:00 2001 From: "Karl O. Pinc kop@karlpinc.com" Date: Mon, 6 Oct 2025 21:39:23 +0000 Subject: [PATCH] Use mixed-case people where possible; conversion problem #40 Create the people table in the clean schema and update the follow table's observer columns in that schema, to make things easy in load_data script. --- conversion/clean.sql | 140 +++++++++++++++++++++++++++++++++++++-- conversion/load_data.sql | 86 +++--------------------- 2 files changed, 144 insertions(+), 82 deletions(-) diff --git a/conversion/clean.sql b/conversion/clean.sql index b494b28..3230d4b 100644 --- a/conversion/clean.sql +++ b/conversion/clean.sql @@ -22,9 +22,7 @@ CREATE TABLE people ( person TEXT PRIMARY KEY NOT NULL CONSTRAINT "Person: Cannot be empty or nothing but spaces" - CHECK(trim(from Person) <> '') - CONSTRAINT "Person: Cannot contain any spaces" - CHECK(Person IS NULL OR POSITION(' ' IN Person) = 0) + CHECK(Person IS NOT NULL AND TRIM(FROM Person) <> '') ,name TEXT NOT NULL CONSTRAINT "Name: Cannot be empty or nothing but spaces" CHECK(trim(from Name) <> '') @@ -45,7 +43,68 @@ INSERT INTO people (person, name, description) , ('ICG', 'ICG', '') , ('UNK', 'Unknown', 'The unknown person') , ('NONE', 'No person' - , 'Used when a person is required, but nobody filled the role'); + , 'Used when a person is required, but nobody filled the role.' + || ' Used when there was no data in the MS Access DB.'); + +-- Load all the observers from "follow" into PEOPLE +-- Convert to mixed-case +-- Conversion problem #40 +INSERT INTO people (person, name, description) + WITH new_people AS ( + SELECT BTRIM(follow.fol_am_observer_1) AS person + FROM clean.follow + WHERE follow.fol_am_observer_1 IS NOT NULL + GROUP BY BTRIM(follow.fol_am_observer_1) + UNION + SELECT BTRIM(follow.fol_am_observer_2) AS person + FROM clean.follow + WHERE follow.fol_am_observer_2 IS NOT NULL + GROUP BY BTRIM(follow.fol_am_observer_2) + UNION + SELECT BTRIM(follow.fol_pm_observer_1) AS person + FROM clean.follow + WHERE follow.fol_pm_observer_1 IS NOT NULL + GROUP BY BTRIM(follow.fol_pm_observer_1) + UNION + SELECT BTRIM(follow.fol_pm_observer_2) AS person + FROM clean.follow + WHERE follow.fol_pm_observer_2 IS NOT NULL + GROUP BY BTRIM(follow.fol_pm_observer_2) + ) + , uniq_people AS ( + SELECT new_people.person + FROM new_people + WHERE new_people.person <> '' + -- special case, we want "NONE" + AND LOWER(BTRIM(new_people.person)) <> 'none' + GROUP BY new_people.person + ) + , mixed_people AS ( + SELECT CASE + WHEN LOWER(uniq_people.person) = uniq_people.person + OR UPPER(uniq_people.person) = uniq_people.person + THEN COALESCE( + (SELECT up.person + FROM uniq_people AS up + WHERE up.person <> LOWER(up.person) + AND up.person <> UPPER(up.person) + AND LOWER(up.person) = LOWER(uniq_people.person) + ORDER BY up.person + LIMIT 1) + , uniq_people.person) + ELSE + uniq_people.person + END AS person + FROM uniq_people + ) + , justone AS ( + SELECT first_value(person) OVER (PARTITION BY LOWER(person)) AS person + FROM mixed_people + ORDER BY LOWER(person), person) + SELECT justone.person, justone.person, '' + FROM justone + ON CONFLICT (person) + DO NOTHING; -- -- Clean the unknown individuals @@ -79,3 +138,76 @@ UPDATE biography UPDATE biography SET b_sex = 'M' WHERE b_animid = 'UNK'; + +-- Update the follow table to use the mixed-case version of +-- the observer code. +-- Conversion problem #40 + +-- Do this the easy way +UPDATE follow + SET fol_am_observer_1 = BTRIM(fol_am_observer_1) + , fol_am_observer_2 = BTRIM(fol_am_observer_2) + , fol_pm_observer_1 = BTRIM(fol_pm_observer_1) + , fol_pm_observer_2 = BTRIM(fol_pm_observer_2); + +UPDATE follow + SET fol_am_observer_1 = CASE + WHEN fol_am_observer_1 IS NOT DISTINCT FROM '' + THEN NULL + ELSE fol_am_observer_1 + END + , fol_am_observer_2 = CASE + WHEN fol_am_observer_2 IS NOT DISTINCT FROM '' + THEN NULL + ELSE fol_am_observer_2 + END + , fol_pm_observer_1 = CASE + WHEN fol_pm_observer_1 IS NOT DISTINCT FROM '' + THEN NULL + ELSE fol_pm_observer_1 + END + , fol_pm_observer_2 = CASE + WHEN fol_pm_observer_2 IS NOT DISTINCT FROM '' + THEN NULL + ELSE fol_pm_observer_2 + END; + +UPDATE follow + SET fol_am_observer_1 = + (SELECT person + FROM people + WHERE LOWER(person) = LOWER(BTRIM(fol_am_observer_1))) + WHERE fol_am_observer_1 IS NOT NULL + AND NOT EXISTS (SELECT 1 + FROM people + WHERE people.person = fol_am_observer_1); + +UPDATE follow + SET fol_am_observer_2 = + (SELECT person + FROM people + WHERE LOWER(person) = LOWER(BTRIM(fol_am_observer_2))) + WHERE fol_am_observer_2 IS NOT NULL + AND NOT EXISTS (SELECT 1 + FROM people + WHERE people.person = fol_am_observer_2); + +UPDATE follow + SET fol_pm_observer_1 = + (SELECT person + FROM people + WHERE LOWER(person) = LOWER(BTRIM(fol_pm_observer_1))) + WHERE fol_pm_observer_1 IS NOT NULL + AND NOT EXISTS (SELECT 1 + FROM people + WHERE people.person = fol_pm_observer_1); + +UPDATE follow + SET fol_pm_observer_2 = + (SELECT person + FROM people + WHERE LOWER(person) = LOWER(BTRIM(fol_pm_observer_2))) + WHERE fol_pm_observer_2 IS NOT NULL + AND NOT EXISTS (SELECT 1 + FROM people + WHERE people.person = fol_pm_observer_2); diff --git a/conversion/load_data.sql b/conversion/load_data.sql index da31b86..82e649c 100644 --- a/conversion/load_data.sql +++ b/conversion/load_data.sql @@ -330,36 +330,6 @@ INSERT INTO follow_studies (fid, code) -- follow -> FOLLOW_OBSERVERS -- --- Load all the new people into PEOPLE -INSERT INTO people (person, name, description, active) - WITH new_people AS ( - SELECT BTRIM(follow.fol_am_observer_1) AS person - FROM clean.follow - WHERE follow.fol_am_observer_1 IS NOT NULL - GROUP BY BTRIM(follow.fol_am_observer_1) - UNION - SELECT BTRIM(follow.fol_am_observer_2) AS person - FROM clean.follow - WHERE follow.fol_am_observer_2 IS NOT NULL - GROUP BY BTRIM(follow.fol_am_observer_2) - UNION - SELECT BTRIM(follow.fol_pm_observer_1) AS person - FROM clean.follow - WHERE follow.fol_pm_observer_1 IS NOT NULL - GROUP BY BTRIM(follow.fol_pm_observer_1) - UNION - SELECT BTRIM(follow.fol_pm_observer_2) AS person - FROM clean.follow - WHERE follow.fol_pm_observer_2 IS NOT NULL - GROUP BY BTRIM(follow.fol_pm_observer_2) - ) - SELECT new_people.person, new_people.person, '', TRUE - FROM new_people - WHERE new_people.person <> '' - GROUP BY new_people.person - ON CONFLICT (person) - DO NOTHING; - -- AM rows INSERT INTO follow_observers ( fid @@ -371,30 +341,10 @@ SELECT upper( -- problem #31 btrim( -- problem #32 follow.fol_b_animid)) AS fol_b_animid , follow.fol_date - , CASE - WHEN BTRIM(follow.fol_am_observer_1) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_am_observer_1) - END AS am_obs1 - , CASE - WHEN BTRIM(follow.fol_am_observer_2) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_am_observer_2) - END AS am_obs2 - , CASE - WHEN BTRIM(follow.fol_pm_observer_1) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_pm_observer_1) - END AS pm_obs1 - , CASE - WHEN BTRIM(follow.fol_pm_observer_2) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_pm_observer_2) - END AS pm_obs2 + , follow.fol_am_observer_1 AS am_obs1 + , follow.fol_am_observer_2 AS am_obs2 + , follow.fol_pm_observer_1 AS pm_obs1 + , follow.fol_pm_observer_2 AS pm_obs2 FROM clean.follow ) SELECT @@ -423,30 +373,10 @@ SELECT upper( -- problem #31 btrim( -- problem #32 follow.fol_b_animid)) AS fol_b_animid , follow.fol_date - , CASE - WHEN BTRIM(follow.fol_am_observer_1) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_am_observer_1) - END AS am_obs1 - , CASE - WHEN BTRIM(follow.fol_am_observer_2) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_am_observer_2) - END AS am_obs2 - , CASE - WHEN BTRIM(follow.fol_pm_observer_1) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_pm_observer_1) - END AS pm_obs1 - , CASE - WHEN BTRIM(follow.fol_pm_observer_2) IS NOT DISTINCT FROM '' - THEN NULL - ELSE - BTRIM(follow.fol_pm_observer_2) - END AS pm_obs2 + , follow.fol_am_observer_1 AS am_obs1 + , follow.fol_am_observer_2 AS am_obs2 + , follow.fol_pm_observer_1 AS pm_obs1 + , follow.fol_pm_observer_2 AS pm_obs2 FROM clean.follow ) SELECT -- 2.34.1