Created
August 18, 2024 03:07
-
-
Save rdbhandari/02d7ab9db305ed06d65c0d10c327287b to your computer and use it in GitHub Desktop.
Import IMDB TSV Database Into PostgreSQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Import IMDB TSV Database Into PostgreSQL | |
| -- CREATE TABLE STRUCTURE | |
| CREATE TABLE name_basics ( | |
| nconst VARCHAR PRIMARY KEY, | |
| primaryName VARCHAR, | |
| birthYear INTEGER, | |
| deathYear INTEGER, | |
| primaryProfession VARCHAR, | |
| knownForTitles VARCHAR | |
| ); | |
| COMMENT ON COLUMN name_basics.nconst IS 'Unique identifier of the name/person'; | |
| COMMENT ON COLUMN name_basics.primaryName IS 'Name by which the person is most often credited'; | |
| COMMENT ON COLUMN name_basics.birthYear IS 'Birth year in YYYY format'; | |
| COMMENT ON COLUMN name_basics.deathYear IS 'Death year in YYYY format if applicable'; | |
| COMMENT ON COLUMN name_basics.primaryProfession IS 'Top-3 professions of the person'; | |
| COMMENT ON COLUMN name_basics.knownForTitles IS 'Titles the person is known for'; | |
| CREATE TABLE title_basics ( | |
| tconst VARCHAR PRIMARY KEY, | |
| titleType VARCHAR, | |
| primaryTitle VARCHAR, | |
| originalTitle VARCHAR, | |
| isAdult BOOLEAN, | |
| startYear INTEGER, | |
| endYear INTEGER, | |
| runtimeMinutes INTEGER, | |
| genres VARCHAR | |
| ); | |
| COMMENT ON COLUMN title_basics.tconst IS 'Unique identifier of the title'; | |
| COMMENT ON COLUMN title_basics.titleType IS 'Type/format of the title (e.g. movie, short, tvseries, etc.)'; | |
| COMMENT ON COLUMN title_basics.primaryTitle IS 'The more popular title / title used by the filmmakers'; | |
| COMMENT ON COLUMN title_basics.originalTitle IS 'Original title in the original language'; | |
| COMMENT ON COLUMN title_basics.isAdult IS '0: non-adult title; 1: adult title'; | |
| COMMENT ON COLUMN title_basics.startYear IS 'Release year of the title; series start year for TV Series'; | |
| COMMENT ON COLUMN title_basics.endYear IS 'TV Series end year; \N for all other title types'; | |
| COMMENT ON COLUMN title_basics.runtimeMinutes IS 'Primary runtime of the title in minutes'; | |
| COMMENT ON COLUMN title_basics.genres IS 'Up to three genres associated with the title'; | |
| CREATE TABLE title_akas ( | |
| titleId VARCHAR PRIMARY KEY, | |
| ordering INTEGER, | |
| title VARCHAR, | |
| region VARCHAR, | |
| language VARCHAR, | |
| types VARCHAR, | |
| attributes VARCHAR, | |
| isOriginalTitle BOOLEAN | |
| ); | |
| COMMENT ON COLUMN title_akas.titleId IS 'Unique identifier of the title'; | |
| COMMENT ON COLUMN title_akas.ordering IS 'Number to uniquely identify rows for a given titleId'; | |
| COMMENT ON COLUMN title_akas.title IS 'Localized title'; | |
| COMMENT ON COLUMN title_akas.region IS 'Region for this version of the title'; | |
| COMMENT ON COLUMN title_akas.language IS 'Language of the title'; | |
| COMMENT ON COLUMN title_akas.types IS 'Enumerated set of attributes for this alternative title'; | |
| COMMENT ON COLUMN title_akas.attributes IS 'Additional terms to describe this alternative title'; | |
| COMMENT ON COLUMN title_akas.isOriginalTitle IS '0: not original title; 1: original title'; | |
| CREATE TABLE title_crew ( | |
| tconst VARCHAR PRIMARY KEY, | |
| directors VARCHAR, | |
| writers VARCHAR, | |
| FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE | |
| ); | |
| COMMENT ON COLUMN title_crew.tconst IS 'Unique identifier of the title'; | |
| COMMENT ON COLUMN title_crew.directors IS 'Director(s) of the title'; | |
| COMMENT ON COLUMN title_crew.writers IS 'Writer(s) of the title'; | |
| CREATE TABLE title_episode ( | |
| tconst VARCHAR PRIMARY KEY, | |
| parentTconst VARCHAR, | |
| seasonNumber INTEGER, | |
| episodeNumber INTEGER, | |
| FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE, | |
| FOREIGN KEY (parentTconst) REFERENCES title_basics(tconst) ON DELETE CASCADE | |
| ); | |
| COMMENT ON COLUMN title_episode.tconst IS 'Unique identifier of the episode'; | |
| COMMENT ON COLUMN title_episode.parentTconst IS 'Unique identifier of the parent TV Series'; | |
| COMMENT ON COLUMN title_episode.seasonNumber IS 'Season number the episode belongs to'; | |
| COMMENT ON COLUMN title_episode.episodeNumber IS 'Episode number in the TV series'; | |
| CREATE TABLE title_principals ( | |
| tconst VARCHAR, | |
| ordering INTEGER, | |
| nconst VARCHAR, | |
| category VARCHAR, | |
| job VARCHAR, | |
| characters VARCHAR, | |
| PRIMARY KEY (tconst, ordering, nconst), | |
| FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE, | |
| FOREIGN KEY (nconst) REFERENCES name_basics(nconst) ON DELETE CASCADE | |
| ); | |
| COMMENT ON COLUMN title_principals.tconst IS 'Unique identifier of the title'; | |
| COMMENT ON COLUMN title_principals.ordering IS 'Number to uniquely identify rows for a given titleId'; | |
| COMMENT ON COLUMN title_principals.nconst IS 'Unique identifier of the name/person'; | |
| COMMENT ON COLUMN title_principals.category IS 'Category of job that person was in'; | |
| COMMENT ON COLUMN title_principals.job IS 'Specific job title if applicable'; | |
| COMMENT ON COLUMN title_principals.characters IS 'Name of the character played if applicable'; | |
| CREATE TABLE title_ratings ( | |
| tconst VARCHAR PRIMARY KEY, | |
| averageRating NUMERIC, | |
| numVotes INTEGER, | |
| FOREIGN KEY (tconst) REFERENCES title_basics(tconst) ON DELETE CASCADE | |
| ); | |
| COMMENT ON COLUMN title_ratings.tconst IS 'Unique identifier of the title'; | |
| COMMENT ON COLUMN title_ratings.averageRating IS 'Weighted average of all the individual user ratings'; | |
| COMMENT ON COLUMN title_ratings.numVotes IS 'Number of votes the title has received'; | |
| -- COPY DATA TO TABLE (Execute Line By Line) | |
| COPY name_basics FROM 'E:\IMDB SETUP\IMDB\name.basics.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| COPY title_akas FROM 'E:\IMDB SETUP\IMDB\title.akas.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| COPY title_basics FROM 'E:\IMDB SETUP\IMDB\title.basics.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| COPY title_crew FROM 'E:\IMDB SETUP\IMDB\title.crew.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| COPY title_episode FROM 'E:\IMDB SETUP\IMDB\title.episode.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| COPY title_principals FROM 'E:\IMDB SETUP\IMDB\title.principals.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| COPY title_ratings FROM 'E:\IMDB SETUP\IMDB\title.ratings.tsv' DELIMITER E'\t' NULL '\N' QUOTE E'\b' CSV HEADER; | |
| -- UPDATE TABLE STRUCTURE (Execute Line By Line. These Commands will update varchar to varchar array) | |
| -- title_akas | |
| UPDATE public.title_akas set types = '{' || types || '}'; | |
| ALTER TABLE public.title_akas ALTER COLUMN types TYPE _varchar USING types::_varchar; | |
| UPDATE public.title_akas set attributes = '{' || attributes || '}'; | |
| ALTER TABLE public.title_akas ALTER COLUMN attributes TYPE _varchar USING attributes::_varchar; | |
| -- title_basics | |
| UPDATE public.title_basics set genres = '{' || genres || '}'; | |
| ALTER TABLE public.title_basics ALTER COLUMN genres TYPE _varchar USING genres::_varchar; | |
| -- title_crew | |
| UPDATE public.title_crew set directors = '{' || directors || '}'; | |
| ALTER TABLE public.title_crew ALTER COLUMN directors TYPE _varchar USING directors::_varchar; | |
| UPDATE public.title_crew set writers = '{' || writers || '}'; | |
| ALTER TABLE public.title_crew ALTER COLUMN writers TYPE _varchar USING writers::_varchar; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment