project data from postgresql
About 1 min
project data from postgresql
prepare
- prepare clickhouse service
- clickhouse with kubernetes
- clickhouse with container
- this article will use this as example
- prepare postgres service
- with kubernetes
- with container
- this article will use this as example
- download data from https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page
- taxi+zone_lookup.csv
curl -LO https://s3.amazonaws.com/nyc-tlc/misc/taxi+_zone_lookup.csv
- prepare table and data in postgresql
- create database
tlc
podman run --rm \ --env PGPASSWORD=postgresql \ --entrypoint psql \ -it docker.io/library/postgres:15.2-alpine3.17 \ --host host.containers.internal \ --port 5432 \ --username postgres \ --dbname postgres \ --command "CREATE DATABASE tlc"
- create table
taxi_zone_lookup
podman run --rm \ --env PGPASSWORD=postgresql \ --entrypoint psql \ -it docker.io/library/postgres:15.2-alpine3.17 \ --host host.containers.internal \ --port 5432 \ --username postgres \ --dbname tlc \ --command "CREATE TABLE IF NOT EXISTS taxi_zone_lookup ( \"LocationID\" INTEGER, \"Borough\" VARCHAR(255), \"Zone\" VARCHAR(255), \"service_zone\" VARCHAR(255), PRIMARY KEY (\"LocationID\") )"
- upload csv data to table
taxi_zone_lookup
podman run --rm \ -v $(pwd)/taxi+_zone_lookup.csv:/data/taxi+_zone_lookup.csv \ --env PGPASSWORD=postgresql \ --entrypoint psql \ -it docker.io/library/postgres:15.2-alpine3.17 \ --host host.containers.internal \ --port 5432 \ --username postgres \ --dbname tlc \ --command "\\COPY taxi_zone_lookup FROM '/data/taxi+_zone_lookup.csv' WITH CSV HEADER DELIMITER ','"
- query data from
taxi_zone_lookup
podman run --rm \ -v $(pwd)/taxi+_zone_lookup.csv:/data/taxi+_zone_lookup.csv \ --env PGPASSWORD=postgresql \ --entrypoint psql \ -it docker.io/library/postgres:15.2-alpine3.17 \ --host host.containers.internal \ --port 5432 \ --username postgres \ --dbname tlc \ --command "SELECT * FROM taxi_zone_lookup LIMIT 10"
- create database
interact with clickhouse
- create table with PostgreSQL engine
podman run --rm \ --entrypoint clickhouse-client \ -it docker.io/clickhouse/clickhouse-server:23.11.5.29-alpine \ --host host.containers.internal \ --port 19000 \ --user ben \ --password 123456 \ --query "CREATE DATABASE IF NOT EXISTS postgresql_tlc ENGINE = PostgreSQL('host.containers.internal:5432', 'postgres', 'postgres', 'postgresql', 'public', 0) COMMENT 'project data from database named tlc in postgresql'"
- query data
podman run --rm \ --entrypoint clickhouse-client \ -it docker.io/clickhouse/clickhouse-server:23.11.5.29-alpine \ --host host.containers.internal \ --port 19000 \ --user ben \ --password 123456 \ --query "SELECT * FROM postgresql_tlc.taxi_zone_lookup LIMIT 10"