Skip to main content

project data from postgresql

ben.wangzAbout 1 min

project data from postgresql

prepare

  1. prepare clickhouse service
  2. prepare postgres service
  3. download data from https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page
  4. prepare table and data in postgresql
    • create database tlc
      • podman run --rm \
            --env PGPASSWORD=postgresql \
            --entrypoint psql \
            -it docker.io/library/postgres:15.2-alpine3.17 \
                --host host.containers.internal \
                --port 5432 \
                --username postgres \
                --dbname postgres \
                --command "CREATE DATABASE tlc"
        
    • create table taxi_zone_lookup
      • podman run --rm \
            --env PGPASSWORD=postgresql \
            --entrypoint psql \
            -it docker.io/library/postgres:15.2-alpine3.17 \
                --host host.containers.internal \
                --port 5432 \
                --username postgres \
                --dbname tlc \
                --command "CREATE TABLE IF NOT EXISTS taxi_zone_lookup (
                    \"LocationID\" INTEGER,
                    \"Borough\" VARCHAR(255),
                    \"Zone\" VARCHAR(255),
                    \"service_zone\" VARCHAR(255),
                    PRIMARY KEY (\"LocationID\")
                )"
        
    • upload csv data to table taxi_zone_lookup
      • podman run --rm \
            -v $(pwd)/taxi+_zone_lookup.csv:/data/taxi+_zone_lookup.csv \
            --env PGPASSWORD=postgresql \
            --entrypoint psql \
            -it docker.io/library/postgres:15.2-alpine3.17 \
                --host host.containers.internal \
                --port 5432 \
                --username postgres \
                --dbname tlc \
                --command "\\COPY taxi_zone_lookup FROM '/data/taxi+_zone_lookup.csv' WITH CSV HEADER DELIMITER ','"
        
    • query data from taxi_zone_lookup
      • podman run --rm \
            -v $(pwd)/taxi+_zone_lookup.csv:/data/taxi+_zone_lookup.csv \
            --env PGPASSWORD=postgresql \
            --entrypoint psql \
            -it docker.io/library/postgres:15.2-alpine3.17 \
                --host host.containers.internal \
                --port 5432 \
                --username postgres \
                --dbname tlc \
                --command "SELECT * FROM taxi_zone_lookup LIMIT 10"
        

interact with clickhouse

  1. create table with PostgreSQL engine
    • podman run --rm \
          --entrypoint clickhouse-client \
          -it docker.io/clickhouse/clickhouse-server:23.11.5.29-alpine \
              --host host.containers.internal \
              --port 19000 \
              --user ben \
              --password 123456 \
              --query "CREATE DATABASE IF NOT EXISTS postgresql_tlc
                ENGINE = PostgreSQL('host.containers.internal:5432', 'postgres', 'postgres', 'postgresql', 'public', 0)
                COMMENT 'project data from database named tlc in postgresql'"
      
  2. query data
    • podman run --rm \
          --entrypoint clickhouse-client \
          -it docker.io/clickhouse/clickhouse-server:23.11.5.29-alpine \
              --host host.containers.internal \
              --port 19000 \
              --user ben \
              --password 123456 \
              --query "SELECT * FROM postgresql_tlc.taxi_zone_lookup LIMIT 10"