Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save rdbhandari/02d7ab9db305ed06d65c0d10c327287b to your computer and use it in GitHub Desktop.

Select an option

Save rdbhandari/02d7ab9db305ed06d65c0d10c327287b to your computer and use it in GitHub Desktop.
Import IMDB TSV Database Into PostgreSQL
Import IMDB TSV Database Into PostgreSQL
-- CREATE TABLE STRUCTURE
CREATE TABLE name_basics (
nconst VARCHAR PRIMARY KEY,
primaryName VARCHAR,
birthYear INTEGER,
deathYear INTEGER,
primaryProfession VARCHAR,
knownForTitles VARCHAR
);
COMMENT ON COLUMN name_basics.nconst IS 'Unique identifier of the name/person';
COMMENT ON COLUMN name_basics.primaryName IS 'Name by which the person is most often credited';
COMMENT ON COLUMN name_basics.birthYear IS 'Birth year in YYYY format';
COMMENT ON COLUMN name_basics.deathYear IS 'Death year in YYYY format if applicable';
COMMENT ON COLUMN name_basics.primaryProfession IS 'Top-3 professions of the person';
COMMENT ON COLUMN name_basics.knownForTitles IS 'Titles the person is known for';
CREATE TABLE title_basics (
tconst VARCHAR PRIMARY KEY,
titleType VARCHAR,
primaryTitle VARCHAR,
originalTitle VARCHAR,
isAdult BOOLEAN,
startYear INTEGER,
endYear INTEGER,
runtimeMinutes INTEGER,
genres VARCHAR
);
COMMENT ON COLUMN title_basics.tconst IS 'Unique identifier of the title';
COMMENT ON COLUMN title_basics.titleType IS 'Type/format of the title (e.g. movie, short, tvseries, etc.)';
COMMENT ON COLUMN title_basics.primaryTitle IS 'The more popular title / title used by the filmmakers';
COMMENT ON COLUMN title_basics.originalTitle IS 'Original title in the original language';
COMMENT ON COLUMN title_basics.isAdult IS '0: non-adult title; 1: adult title';
COMMENT ON COLUMN title_basics.startYear IS 'Release year of the title; series start year for TV Series';
COMMENT ON COLUMN title_basics.endYear IS 'TV Series end year; \N for all other title types';
COMMENT ON COLUMN title_basics.runtimeMinutes IS 'Primary runtime of the title in minutes';
COMMENT ON COLUMN title_basics.genres IS 'Up to three genres associated with the title';
CREATE TABLE title_akas (
titleId VARCHAR PRIMARY KEY,
ordering INTEGER,
title VARCHAR,
region VARCHAR,
language VARCHAR,
types VARCHAR,
attributes VARCHAR,
isOriginalTitle BOOLEAN
);
COMMENT ON COLUMN title_akas.titleId IS 'Unique identifier of the title';
COMMENT ON COLUMN title_akas.ordering IS 'Number to uniquely identify rows for a given titleId';
COMMENT ON COLUMN title_akas.title IS 'Localized title';
COMMENT ON COLUMN title_akas.region IS 'Region for this version of the title';
COMMENT ON COLUMN title_akas.language IS 'Language of the title';
COMMENT ON COLUMN title_akas.types IS 'Enumerated set of attributes for this alternative title';
COMMENT ON COLUMN title_akas.attributes IS 'Additional terms to describe this alternative title';
COMMENT ON COLUMN title_akas.isOriginalTitle IS '0: not original title; 1: original title';
CREATE TABLE title_crew (
tconst VARCHAR PRIMARY KEY,
directors VARCHAR,
writers VARCHAR,
FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE
);
COMMENT ON COLUMN title_crew.tconst IS 'Unique identifier of the title';
COMMENT ON COLUMN title_crew.directors IS 'Director(s) of the title';
COMMENT ON COLUMN title_crew.writers IS 'Writer(s) of the title';
CREATE TABLE title_episode (
tconst VARCHAR PRIMARY KEY,
parentTconst VARCHAR,
seasonNumber INTEGER,
episodeNumber INTEGER,
FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE,
FOREIGN KEY (parentTconst) REFERENCES title_basics(tconst) ON DELETE CASCADE
);
COMMENT ON COLUMN title_episode.tconst IS 'Unique identifier of the episode';
COMMENT ON COLUMN title_episode.parentTconst IS 'Unique identifier of the parent TV Series';
COMMENT ON COLUMN title_episode.seasonNumber IS 'Season number the episode belongs to';
COMMENT ON COLUMN title_episode.episodeNumber IS 'Episode number in the TV series';
CREATE TABLE title_principals (
tconst VARCHAR,
ordering INTEGER,
nconst VARCHAR,
category VARCHAR,
job VARCHAR,
characters VARCHAR,
PRIMARY KEY (tconst, ordering, nconst),
FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE,
FOREIGN KEY (nconst) REFERENCES name_basics(nconst) ON DELETE CASCADE
);
COMMENT ON COLUMN title_principals.tconst IS 'Unique identifier of the title';
COMMENT ON COLUMN title_principals.ordering IS 'Number to uniquely identify rows for a given titleId';
COMMENT ON COLUMN title_principals.nconst IS 'Unique identifier of the name/person';
COMMENT ON COLUMN title_principals.category IS 'Category of job that person was in';
COMMENT ON COLUMN title_principals.job IS 'Specific job title if applicable';
COMMENT ON COLUMN title_principals.characters IS 'Name of the character played if applicable';
CREATE TABLE title_ratings (
tconst VARCHAR PRIMARY KEY,
averageRating NUMERIC,
numVotes INTEGER,
FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE
);
COMMENT ON COLUMN title_ratings.tconst IS 'Unique identifier of the title';
COMMENT ON COLUMN title_ratings.averageRating IS 'Weighted average of all the individual user ratings';
COMMENT ON COLUMN title_ratings.numVotes IS 'Number of votes the title has received';
-- COPY DATA TO TABLE (Execute Line By Line)
COPY name_basics FROM 'E:\IMDB SETUP\IMDB\name.basics.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
COPY title_akas FROM 'E:\IMDB SETUP\IMDB\title.akas.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
COPY title_basics FROM 'E:\IMDB SETUP\IMDB\title.basics.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
COPY title_crew FROM 'E:\IMDB SETUP\IMDB\title.crew.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
COPY title_episode FROM 'E:\IMDB SETUP\IMDB\title.episode.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
COPY title_principals FROM 'E:\IMDB SETUP\IMDB\title.principals.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
COPY title_ratings FROM 'E:\IMDB SETUP\IMDB\title.ratings.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER;
-- UPDATE TABLE STRUCTURE (Execute Line By Line. These Commands will update varchar to varchar array)
-- title_akas
UPDATE public.title_akas set types = '{' || types || '}';
ALTER TABLE public.title_akas ALTER COLUMN types TYPE _varchar USING types::_varchar;
UPDATE public.title_akas set attributes = '{' || attributes || '}';
ALTER TABLE public.title_akas ALTER COLUMN attributes TYPE _varchar USING attributes::_varchar;
-- title_basics
UPDATE public.title_basics set genres = '{' || genres || '}';
ALTER TABLE public.title_basics ALTER COLUMN genres TYPE _varchar USING genres::_varchar;
-- title_crew
UPDATE public.title_crew set directors = '{' || directors || '}';
ALTER TABLE public.title_crew ALTER COLUMN directors TYPE _varchar USING directors::_varchar;
UPDATE public.title_crew set writers = '{' || writers || '}';
ALTER TABLE public.title_crew ALTER COLUMN writers TYPE _varchar USING writers::_varchar;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment