From fe8b29797c4db3f98763e59070ff19699267740e Mon Sep 17 00:00:00 2001 From: "Karl O. Pinc" Date: Thu, 9 Nov 2023 10:20:16 -0600 Subject: [PATCH] System to load MS Access dump into Postgres --- README | 3 +- conversion/Makefile | 116 +++++++++++++++++++++++++++++++ conversion/README | 60 ++++++++++++++++ conversion/createschemas.sql | 20 ++++++ conversion/grant_permissions.sql | 62 +++++++++++++++++ conversion/mung_dump.sh | 69 ++++++++++++++++++ 6 files changed, 329 insertions(+), 1 deletion(-) create mode 100644 conversion/Makefile create mode 100644 conversion/README create mode 100644 conversion/createschemas.sql create mode 100644 conversion/grant_permissions.sql create mode 100755 conversion/mung_dump.sh diff --git a/README b/README index 5d53daf..eef1d32 100644 --- a/README +++ b/README @@ -2,7 +2,8 @@ Type "make" in this directory for help on the build system. Note that while some subdirectories have their own Makefile, all make invocations should be from this directory. All dependencies are not -guarenteed to be examined otherwise. +guarenteed to be examined otherwise. The exception to this is +conversion/Makefile. It is invoked directly. Notes on the build system: diff --git a/conversion/Makefile b/conversion/Makefile new file mode 100644 index 0000000..d977e9e --- /dev/null +++ b/conversion/Makefile @@ -0,0 +1,116 @@ +# Copyright (C) 2023 The Meme Factory, Inc. www.karlpinc.com +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# Bugs: +# We don't try super-hard to be super-clean with the code, since the +# conversion code need not be maintained long-term. +# +# Karl O. Pinc + +# +# Variables +# +MAKE_FILES := ../make_files +export M4_GLOBAL_INCLUDE_PATH := ../include + +# Those files that might change the rebuild results +# (At this point we don't distinguish between building docs and database.) +GENERIC_DEPENDS := Makefile \ + $(wildcard $(MAKE_FILES)/*.mk) \ + $(wildcard $(M4_GLOBAL_INCLUDE_PATH)/*.m4) + +# Make "help" be the default target +include $(MAKE_FILES)/help.mk + +# Defaults for variable values +# Have a look as you'll probably want to set some of them. +include $(MAKE_FILES)/defaults.mk +# Adjust paths in defaults.mk +PSQL_SETUP_ORIG := PSQL_SETUP +PSQL_DEPENDS := ../$(PSQL_DEPENDS) + +# Be sure that our dependences are met. +PSQL_SETUP: $(GENERIC_DEPENDS) + $(MAKE) -C .. $(PSQL_SETUP_ORIG) + +## ######################################################################## +## CONVERSION TARGETS +## +## For further help, examples, and information on how to use this file, see: +## https://sokwe.janegoodall.org/wiki/BuildSystem +## +## Variables controlling conversion: +## +## DATA_FILE +## The bzip2 compressed dump of the SQL that restores the original +## MS Access database structure and data content into PostgreSQL. +## +## CONVERSION_SCHEMA +## The schema into which the dump of the original MS Access +## database is restored. The default is: conversion + +# +# The conversion portion of the build system +# + +CONVERSION_SCHEMA := conversion +PSQL_SETUP := ../$(PSQL_SETUP) \ + printf 'SET search_path TO $(CONVERSION_SCHEMA);\n' ; + +## +## The available targets for make (make TARGET) are: +## + +## convert Load the DATA_FILE MS Access dump into the +## CONVERSION_SCHEMA and convert the data, storing +## the converted data in the TARGET_DB. +.PHONY: convert +convert: create-schema restore grant_permissions + +## clean Delete all user-generated files +.PHONY: clean +clean: + +## +## Lesser used targets: +## + +## create-schema Create the CONVERSION_SCHEMA +create-schema: $(PSQL_DEPENDS) + ( $(PSQL_SETUP) \ + cat createschemas.sql ; ) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +## restore Load the DATA_FILE MS Access dump into the +## CONVERSION_SCHEMA. The DATA_FILE is expected +## to drop all tables (and other db objects) before +## creating them, so this can be re-run multiple +## times. +.PHONY: restore +restore: $(PSQL_DEPENDS) + [ -n "$(DATA_FILE)" ] \ + || { printf 'The DATA_FILE variable must be set\n' >&2 ; \ + exit 1 ; } + ( $(PSQL_SETUP) \ + ./mung_dump.sh $(DATA_FILE) ; ) \ + | psql $(PSQL_ARGS) +# | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y + +## grant_permissions Give readers and writers read permission to all tables. +.PHONY: grant_permissions +grant_permissions: $(PSQL_DEPENDS) grant_permissions.sql + ( $(PSQL_SETUP) \ + cat grant_permissions.sql ; ) \ + | psql $(PSQL_ARGS) $(PSQL_SINGLE_TRANS) --set=ON_ERROR_STOP=y diff --git a/conversion/README b/conversion/README new file mode 100644 index 0000000..01e557b --- /dev/null +++ b/conversion/README @@ -0,0 +1,60 @@ + +| psql --tuples-only -q -U kop_admin -d sokwedb_dev -h /var/run/postgresql +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 10:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 10:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 10:45:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 11:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 11:30:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 11:45:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 12:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 12:30:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 13:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 13:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 13:45:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 14:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 14:30:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 14:45:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 15:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 15:30:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 16:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 16:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 16:45:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 17:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 17:30:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 17:45:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 18:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 18:30:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-01-21 00:00:00, HM, 1899-12-30 19:00:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-02-25 00:00:00, EV, 1899-12-30 12:15:00) already exists. +ERROR: duplicate key value violates unique constraint "FOLLOW_MAP_TIME_pkey" +DETAIL: Key ("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time")=(1977-03-04 00:00:00, EV, 1899-12-30 09:15:00) already exists. + + +Wed 08 Nov 2023 02:43:10 AM CST + diff --git a/conversion/createschemas.sql b/conversion/createschemas.sql new file mode 100644 index 0000000..591142c --- /dev/null +++ b/conversion/createschemas.sql @@ -0,0 +1,20 @@ +-- Copyright (C) 2023 The Meme Factory, Inc. http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + + +CREATE SCHEMA IF NOT EXISTS conversion; +GRANT USAGE ON SCHEMA upload TO reader, writer; diff --git a/conversion/grant_permissions.sql b/conversion/grant_permissions.sql new file mode 100644 index 0000000..765f2f9 --- /dev/null +++ b/conversion/grant_permissions.sql @@ -0,0 +1,62 @@ +-- Copyright (C) 2023 The Meme Factory, Inc. http://www.karlpinc.com/ +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU Affero General Public License for more details. +-- +-- You should have received a copy of the GNU Affero General Public License +-- along with this program. If not, see . +-- +-- Karl O. Pinc + + +GRANT SELECT ON "ADJ_TIME" TO reader, writer; +GRANT SELECT ON "AGGRESSION_EVENT" TO reader, writer; +GRANT SELECT ON "AGGRESSION_EVENT_LOG" TO reader, writer; +GRANT SELECT ON "ATTENDANCE" TO reader, writer; +GRANT SELECT ON "BIOGRAPHY" TO reader, writer; +GRANT SELECT ON "BIOGRAPHY_UPDATE_LOG" TO reader, writer; +GRANT SELECT ON "BRECORD_NOTES" TO reader, writer; +GRANT SELECT ON "CHIMPS_ON_TIKIS_LOOKUP" TO reader, writer; +GRANT SELECT ON "COLOBUS" TO reader, writer; +GRANT SELECT ON "COMMUNITY_LOOKUP" TO reader, writer; +GRANT SELECT ON "COMMUNITY_MEMBERSHIP" TO reader, writer; +GRANT SELECT ON "COMMUNITY_MEMBERSHIP_UPDATE_LOG" TO reader, writer; +GRANT SELECT ON "ELO RANKS DAILY - FEMALES - old" TO reader, writer; +GRANT SELECT ON "ELO RANKS DAILY - KK FEMALES" TO reader, writer; +GRANT SELECT ON "ELO RANKS DAILY - KK MALES" TO reader, writer; +GRANT SELECT ON "ELO RANKS DAILY - MALES - old" TO reader, writer; +GRANT SELECT ON "ELO RANKS DAILY - MT FEMALES" TO reader, writer; +GRANT SELECT ON "FEMALE REPRODUCTIVE STATES" TO reader, writer; +GRANT SELECT ON "FEMALE_REPRO_HISTORY" TO reader, writer; +GRANT SELECT ON "FERTILITY" TO reader, writer; +GRANT SELECT ON "FOLLOW" TO reader, writer; +GRANT SELECT ON "FOLLOW_ARRIVAL" TO reader, writer; +GRANT SELECT ON "FOLLOW_MAP_LOCATION" TO reader, writer; +GRANT SELECT ON "FOLLOW_MAP_POSITION" TO reader, writer; +GRANT SELECT ON "FOLLOW_MAP_TIME" TO reader, writer; +GRANT SELECT ON "FOLLOW_MAP_TIME_PROBLEMS" TO reader, writer; +GRANT SELECT ON "FOOD_BOUT" TO reader, writer; +GRANT SELECT ON "FOOD_LOOKUP" TO reader, writer; +GRANT SELECT ON "FOOD_PART_LOOKUP" TO reader, writer; +GRANT SELECT ON "FOOD_VARIATIONS_LOOKUP" TO reader, writer; +GRANT SELECT ON "GROOM_BOUT" TO reader, writer; +GRANT SELECT ON "GROOM_SCAN_AREC" TO reader, writer; +GRANT SELECT ON "GROOM_SCANS" TO reader, writer; +GRANT SELECT ON "GROUP_COMP_INPUT" TO reader, writer; +GRANT SELECT ON "MATING_EVENT" TO reader, writer; +GRANT SELECT ON "OTHER_SPECIES" TO reader, writer; +GRANT SELECT ON "OTHER_SPECIES_LOOKUP" TO reader, writer; +GRANT SELECT ON "PANTGRUNT_EVENT" TO reader, writer; +GRANT SELECT ON "PANTGRUNT_EVENT_LOG" TO reader, writer; +GRANT SELECT ON "SIGHTINGS" TO reader, writer; +GRANT SELECT ON "SIV_STATUS_BOUT" TO reader, writer; +GRANT SELECT ON "SUBADULT_ARRIVALS_LOG" TO reader, writer; +GRANT SELECT ON "tbl_adjusted ages for alone arrivals" TO reader, writer; +GRANT SELECT ON "tlk_brecord_notes_codes" TO reader, writer; diff --git a/conversion/mung_dump.sh b/conversion/mung_dump.sh new file mode 100755 index 0000000..2d57d5d --- /dev/null +++ b/conversion/mung_dump.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# Copyright (C) 2023 The Meme Factory, Inc. http://www.karlpinc.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +# Ouput SQL on stdout of a modifed dump, applying settings to speed +# up the restore and altering the dump so that it will load. +# +# Syntax: mung_dump.sh DATA_FILE +# +# DATA_FILE The file containing the bzip2 compressed data dump +# +# +# Karl O. Pinc +# +# Note: The DATA_FILE seems to use crlf (MS Windows format) for EOL. +# + +DATA_FILE="$1" + +# Settings to speed the restore. +printf 'SET default_transaction_isolation TO "read uncommitted";\n' +printf 'SET synchronous_commit TO "off";\n' + +# Output the munged data dump as SQL +bunzip2 -c "${DATA_FILE}" \ + | awk ' + # Get rid of the CREATE DATABASE statement + /^CREATE DATABASE / { printed = 1; } + + # Get rid of the (2) SELECT statements that set sequences + /^SELECT / { printed = 1; } + + # Change all VARCHAR(255) declarations to TEXT + / VARCHAR\(255\)/ { print gensub(/ VARCHAR\(255\)/, " TEXT", 1); \ + printed = 1; } + + # Frob the CREATE TABLE for FOLLOW_MAP_TIME. + # Get rid of the comma on the line before the PRIMARY KEY declaration + / "FMT_update" TIMESTAMP,/ { print gensub(/,/, "", 1); \ + printed = 1; } + # Get rid of the PRIMARY KEY declaration, because the "key" is not + # unique. + / PRIMARY KEY \("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time"\)/\ + { printed = 1; } + + # Everything else is printed + {if (printed) { + printed = 0 ; } + else { + print ; } + }' \ + /dev/stdin + +# Create a non-unique index for the primary key index we removed. +printf '%s%s\n' \ + 'CREATE INDEX "FOLLOW_MAP_TIME_index1" ON "FOLLOW_MAP_TIME"' \ + '("FMT_FOL_date", "FMT_FOL_B_focal_AnimID", "FMT_time");' -- 2.34.1