Created
October 4, 2022 22:31
-
-
Save jlaw9/723021269238c87057382f093d6c3fe6 to your computer and use it in GitHub Desktop.
Matminer example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "a63c7c38-b694-4528-a7cd-baed73fd7c70", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "from pymatgen.core import Structure\n", | |
| "from matminer.featurizers.site import CrystalNNFingerprint\n", | |
| "from matminer.featurizers.structure import SiteStatsFingerprint" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "d1a28c55-3a50-45c6-ab65-1515b5eb9681", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1181\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>material_id</th>\n", | |
| " <th>formula</th>\n", | |
| " <th>nsites</th>\n", | |
| " <th>space_group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>structure</th>\n", | |
| " <th>elastic_anisotropy</th>\n", | |
| " <th>G_Reuss</th>\n", | |
| " <th>G_VRH</th>\n", | |
| " <th>G_Voigt</th>\n", | |
| " <th>K_Reuss</th>\n", | |
| " <th>K_VRH</th>\n", | |
| " <th>K_Voigt</th>\n", | |
| " <th>poisson_ratio</th>\n", | |
| " <th>compliance_tensor</th>\n", | |
| " <th>elastic_tensor</th>\n", | |
| " <th>elastic_tensor_original</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>mp-10003</td>\n", | |
| " <td>Nb4CoSi</td>\n", | |
| " <td>12</td>\n", | |
| " <td>124</td>\n", | |
| " <td>194.419802</td>\n", | |
| " <td>[[0.94814328 2.07280467 2.5112 ] Nb, [5.273...</td>\n", | |
| " <td>0.030688</td>\n", | |
| " <td>96.844535</td>\n", | |
| " <td>97.141604</td>\n", | |
| " <td>97.438674</td>\n", | |
| " <td>194.267623</td>\n", | |
| " <td>194.268884</td>\n", | |
| " <td>194.270146</td>\n", | |
| " <td>0.285701</td>\n", | |
| " <td>[[0.004385293093993, -0.0016070693558990002, -...</td>\n", | |
| " <td>[[311.33514638650246, 144.45092552856926, 126....</td>\n", | |
| " <td>[[311.33514638650246, 144.45092552856926, 126....</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>mp-10010</td>\n", | |
| " <td>Al(CoSi)2</td>\n", | |
| " <td>5</td>\n", | |
| " <td>164</td>\n", | |
| " <td>61.987320</td>\n", | |
| " <td>[[0. 0. 0.] Al, [1.96639263 1.13529553 0.75278...</td>\n", | |
| " <td>0.266910</td>\n", | |
| " <td>93.939650</td>\n", | |
| " <td>96.252006</td>\n", | |
| " <td>98.564362</td>\n", | |
| " <td>173.647763</td>\n", | |
| " <td>175.449907</td>\n", | |
| " <td>177.252050</td>\n", | |
| " <td>0.268105</td>\n", | |
| " <td>[[0.0037715428949660003, -0.000844229828709, -...</td>\n", | |
| " <td>[[306.93357350984974, 88.02634955100905, 105.6...</td>\n", | |
| " <td>[[306.93357350984974, 88.02634955100905, 105.6...</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " material_id formula nsites space_group volume \\\n", | |
| "0 mp-10003 Nb4CoSi 12 124 194.419802 \n", | |
| "1 mp-10010 Al(CoSi)2 5 164 61.987320 \n", | |
| "\n", | |
| " structure elastic_anisotropy \\\n", | |
| "0 [[0.94814328 2.07280467 2.5112 ] Nb, [5.273... 0.030688 \n", | |
| "1 [[0. 0. 0.] Al, [1.96639263 1.13529553 0.75278... 0.266910 \n", | |
| "\n", | |
| " G_Reuss G_VRH G_Voigt K_Reuss K_VRH K_Voigt \\\n", | |
| "0 96.844535 97.141604 97.438674 194.267623 194.268884 194.270146 \n", | |
| "1 93.939650 96.252006 98.564362 173.647763 175.449907 177.252050 \n", | |
| "\n", | |
| " poisson_ratio compliance_tensor \\\n", | |
| "0 0.285701 [[0.004385293093993, -0.0016070693558990002, -... \n", | |
| "1 0.268105 [[0.0037715428949660003, -0.000844229828709, -... \n", | |
| "\n", | |
| " elastic_tensor \\\n", | |
| "0 [[311.33514638650246, 144.45092552856926, 126.... \n", | |
| "1 [[306.93357350984974, 88.02634955100905, 105.6... \n", | |
| "\n", | |
| " elastic_tensor_original \n", | |
| "0 [[311.33514638650246, 144.45092552856926, 126.... \n", | |
| "1 [[306.93357350984974, 88.02634955100905, 105.6... " | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from matminer.datasets.convenience_loaders import load_elastic_tensor\n", | |
| "\n", | |
| "df = load_elastic_tensor() # loads dataset in a pandas DataFrame object\n", | |
| "print(len(df))\n", | |
| "df.head(2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "86b5151c-5458-4718-b242-5a6d00f9f3a9", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Structure Summary\n", | |
| "Lattice\n", | |
| " abc : 6.22178 6.22178 5.0224\n", | |
| " angles : 90.0 90.0 90.0\n", | |
| " volume : 194.41984808065214\n", | |
| " A : 6.22178 0.0 0.0\n", | |
| " B : 0.0 6.22178 0.0\n", | |
| " C : 0.0 0.0 5.0224\n", | |
| "PeriodicSite: Nb (0.9481, 2.0728, 2.5112) [0.1524, 0.3332, 0.5000]\n", | |
| "PeriodicSite: Nb (5.2736, 4.1490, 2.5112) [0.8476, 0.6668, 0.5000]\n", | |
| "PeriodicSite: Nb (4.1490, 0.9481, 2.5112) [0.6668, 0.1524, 0.5000]\n", | |
| "PeriodicSite: Nb (2.0728, 5.2736, 2.5112) [0.3332, 0.8476, 0.5000]\n", | |
| "PeriodicSite: Nb (5.2736, 2.0728, 0.0000) [0.8476, 0.3332, 0.0000]\n", | |
| "PeriodicSite: Nb (4.1490, 5.2736, 0.0000) [0.6668, 0.8476, 0.0000]\n", | |
| "PeriodicSite: Nb (2.0728, 0.9481, 0.0000) [0.3332, 0.1524, 0.0000]\n", | |
| "PeriodicSite: Nb (0.9481, 4.1490, 0.0000) [0.1524, 0.6668, 0.0000]\n", | |
| "PeriodicSite: Co (0.0000, 0.0000, 3.7668) [0.0000, 0.0000, 0.7500]\n", | |
| "PeriodicSite: Co (0.0000, 0.0000, 1.2556) [0.0000, 0.0000, 0.2500]\n", | |
| "PeriodicSite: Si (3.1109, 3.1109, 3.7668) [0.5000, 0.5000, 0.7500]\n", | |
| "PeriodicSite: Si (3.1109, 3.1109, 1.2556) [0.5000, 0.5000, 0.2500]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# first use an example structure\n", | |
| "structure = df.structure.values[0]\n", | |
| "structure" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "f3381626-949b-4c05-829c-07c9147d0846", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ssf = SiteStatsFingerprint(\n", | |
| " CrystalNNFingerprint.from_preset('ops', distance_cutoffs=None, x_diff_weight=0),\n", | |
| " stats=('mean', 'std_dev')\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "ec9e7455-cf0f-4f4e-9ba6-998b8251ee84", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "122\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "features = ssf.featurize(structure)\n", | |
| "print(len(features))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "7ba672ba-8990-4161-850a-3069b354d2f2", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "217aecf8f26e4df6916c027733554d5e", | |
| "version_major": 2, | |
| "version_minor": 0 | |
| }, | |
| "text/plain": [ | |
| "SiteStatsFingerprint: 0%| | 0/10 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>material_id</th>\n", | |
| " <th>formula</th>\n", | |
| " <th>nsites</th>\n", | |
| " <th>space_group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>structure</th>\n", | |
| " <th>elastic_anisotropy</th>\n", | |
| " <th>G_Reuss</th>\n", | |
| " <th>G_VRH</th>\n", | |
| " <th>G_Voigt</th>\n", | |
| " <th>...</th>\n", | |
| " <th>mean wt CN_20</th>\n", | |
| " <th>std_dev wt CN_20</th>\n", | |
| " <th>mean wt CN_21</th>\n", | |
| " <th>std_dev wt CN_21</th>\n", | |
| " <th>mean wt CN_22</th>\n", | |
| " <th>std_dev wt CN_22</th>\n", | |
| " <th>mean wt CN_23</th>\n", | |
| " <th>std_dev wt CN_23</th>\n", | |
| " <th>mean wt CN_24</th>\n", | |
| " <th>std_dev wt CN_24</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>mp-10003</td>\n", | |
| " <td>Nb4CoSi</td>\n", | |
| " <td>12</td>\n", | |
| " <td>124</td>\n", | |
| " <td>194.419802</td>\n", | |
| " <td>[[0.94814328 2.07280467 2.5112 ] Nb, [5.273...</td>\n", | |
| " <td>0.030688</td>\n", | |
| " <td>96.844535</td>\n", | |
| " <td>97.141604</td>\n", | |
| " <td>97.438674</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>mp-10010</td>\n", | |
| " <td>Al(CoSi)2</td>\n", | |
| " <td>5</td>\n", | |
| " <td>164</td>\n", | |
| " <td>61.987320</td>\n", | |
| " <td>[[0. 0. 0.] Al, [1.96639263 1.13529553 0.75278...</td>\n", | |
| " <td>0.266910</td>\n", | |
| " <td>93.939650</td>\n", | |
| " <td>96.252006</td>\n", | |
| " <td>98.564362</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000255</td>\n", | |
| " <td>0.000509</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>mp-10015</td>\n", | |
| " <td>SiOs</td>\n", | |
| " <td>2</td>\n", | |
| " <td>221</td>\n", | |
| " <td>25.952539</td>\n", | |
| " <td>[[1.480346 1.480346 1.480346] Si, [0. 0. 0.] Os]</td>\n", | |
| " <td>0.756489</td>\n", | |
| " <td>120.962289</td>\n", | |
| " <td>130.112955</td>\n", | |
| " <td>139.263621</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>mp-10021</td>\n", | |
| " <td>Ga</td>\n", | |
| " <td>4</td>\n", | |
| " <td>63</td>\n", | |
| " <td>76.721433</td>\n", | |
| " <td>[[0. 1.09045794 0.84078375] Ga, [0. ...</td>\n", | |
| " <td>2.376805</td>\n", | |
| " <td>12.205989</td>\n", | |
| " <td>15.101901</td>\n", | |
| " <td>17.997812</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>mp-10025</td>\n", | |
| " <td>SiRu2</td>\n", | |
| " <td>12</td>\n", | |
| " <td>62</td>\n", | |
| " <td>160.300999</td>\n", | |
| " <td>[[1.0094265 4.24771709 2.9955487 ] Si, [3.028...</td>\n", | |
| " <td>0.196930</td>\n", | |
| " <td>100.110773</td>\n", | |
| " <td>101.947798</td>\n", | |
| " <td>103.784823</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>mp-10037</td>\n", | |
| " <td>AlCo3C</td>\n", | |
| " <td>5</td>\n", | |
| " <td>221</td>\n", | |
| " <td>51.574959</td>\n", | |
| " <td>[[0. 0. 0.] Al, [0. 1.861157 1.861157] C...</td>\n", | |
| " <td>0.420936</td>\n", | |
| " <td>111.795761</td>\n", | |
| " <td>116.501644</td>\n", | |
| " <td>121.207527</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>mp-10063</td>\n", | |
| " <td>CdSnSb2</td>\n", | |
| " <td>16</td>\n", | |
| " <td>122</td>\n", | |
| " <td>580.176940</td>\n", | |
| " <td>[[0. 3.322252 9.855906] Cd, [0. 0....</td>\n", | |
| " <td>0.629264</td>\n", | |
| " <td>16.692188</td>\n", | |
| " <td>17.742410</td>\n", | |
| " <td>18.792631</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>mp-101</td>\n", | |
| " <td>Ir</td>\n", | |
| " <td>4</td>\n", | |
| " <td>225</td>\n", | |
| " <td>58.258386</td>\n", | |
| " <td>[[0. 0. 0.] Ir, [0. 1.938308 1.938308] I...</td>\n", | |
| " <td>0.174540</td>\n", | |
| " <td>212.791803</td>\n", | |
| " <td>216.505869</td>\n", | |
| " <td>220.219935</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>mp-10125</td>\n", | |
| " <td>SbIr</td>\n", | |
| " <td>4</td>\n", | |
| " <td>194</td>\n", | |
| " <td>80.054967</td>\n", | |
| " <td>[[2.03154403 1.17291049 4.19959275] Sb, [-2.03...</td>\n", | |
| " <td>0.591712</td>\n", | |
| " <td>55.983343</td>\n", | |
| " <td>59.245410</td>\n", | |
| " <td>62.507476</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>mp-10154</td>\n", | |
| " <td>MnSbIr</td>\n", | |
| " <td>12</td>\n", | |
| " <td>216</td>\n", | |
| " <td>228.873769</td>\n", | |
| " <td>[[3.0584545 0. 0. ] Mn, [3.058454...</td>\n", | |
| " <td>0.109313</td>\n", | |
| " <td>39.747198</td>\n", | |
| " <td>40.181671</td>\n", | |
| " <td>40.616145</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>10 rows × 139 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " material_id formula nsites space_group volume \\\n", | |
| "0 mp-10003 Nb4CoSi 12 124 194.419802 \n", | |
| "1 mp-10010 Al(CoSi)2 5 164 61.987320 \n", | |
| "2 mp-10015 SiOs 2 221 25.952539 \n", | |
| "3 mp-10021 Ga 4 63 76.721433 \n", | |
| "4 mp-10025 SiRu2 12 62 160.300999 \n", | |
| "5 mp-10037 AlCo3C 5 221 51.574959 \n", | |
| "6 mp-10063 CdSnSb2 16 122 580.176940 \n", | |
| "7 mp-101 Ir 4 225 58.258386 \n", | |
| "8 mp-10125 SbIr 4 194 80.054967 \n", | |
| "9 mp-10154 MnSbIr 12 216 228.873769 \n", | |
| "\n", | |
| " structure elastic_anisotropy \\\n", | |
| "0 [[0.94814328 2.07280467 2.5112 ] Nb, [5.273... 0.030688 \n", | |
| "1 [[0. 0. 0.] Al, [1.96639263 1.13529553 0.75278... 0.266910 \n", | |
| "2 [[1.480346 1.480346 1.480346] Si, [0. 0. 0.] Os] 0.756489 \n", | |
| "3 [[0. 1.09045794 0.84078375] Ga, [0. ... 2.376805 \n", | |
| "4 [[1.0094265 4.24771709 2.9955487 ] Si, [3.028... 0.196930 \n", | |
| "5 [[0. 0. 0.] Al, [0. 1.861157 1.861157] C... 0.420936 \n", | |
| "6 [[0. 3.322252 9.855906] Cd, [0. 0.... 0.629264 \n", | |
| "7 [[0. 0. 0.] Ir, [0. 1.938308 1.938308] I... 0.174540 \n", | |
| "8 [[2.03154403 1.17291049 4.19959275] Sb, [-2.03... 0.591712 \n", | |
| "9 [[3.0584545 0. 0. ] Mn, [3.058454... 0.109313 \n", | |
| "\n", | |
| " G_Reuss G_VRH G_Voigt ... mean wt CN_20 std_dev wt CN_20 \\\n", | |
| "0 96.844535 97.141604 97.438674 ... 0.000000 0.000000 \n", | |
| "1 93.939650 96.252006 98.564362 ... 0.000255 0.000509 \n", | |
| "2 120.962289 130.112955 139.263621 ... 0.000000 0.000000 \n", | |
| "3 12.205989 15.101901 17.997812 ... 0.000000 0.000000 \n", | |
| "4 100.110773 101.947798 103.784823 ... 0.000000 0.000000 \n", | |
| "5 111.795761 116.501644 121.207527 ... 0.000000 0.000000 \n", | |
| "6 16.692188 17.742410 18.792631 ... 0.000000 0.000000 \n", | |
| "7 212.791803 216.505869 220.219935 ... 0.000000 0.000000 \n", | |
| "8 55.983343 59.245410 62.507476 ... 0.000000 0.000000 \n", | |
| "9 39.747198 40.181671 40.616145 ... 0.000000 0.000000 \n", | |
| "\n", | |
| " mean wt CN_21 std_dev wt CN_21 mean wt CN_22 std_dev wt CN_22 \\\n", | |
| "0 0.0 0.0 0.0 0.0 \n", | |
| "1 0.0 0.0 0.0 0.0 \n", | |
| "2 0.0 0.0 0.0 0.0 \n", | |
| "3 0.0 0.0 0.0 0.0 \n", | |
| "4 0.0 0.0 0.0 0.0 \n", | |
| "5 0.0 0.0 0.0 0.0 \n", | |
| "6 0.0 0.0 0.0 0.0 \n", | |
| "7 0.0 0.0 0.0 0.0 \n", | |
| "8 0.0 0.0 0.0 0.0 \n", | |
| "9 0.0 0.0 0.0 0.0 \n", | |
| "\n", | |
| " mean wt CN_23 std_dev wt CN_23 mean wt CN_24 std_dev wt CN_24 \n", | |
| "0 0.0 0.0 0.0 0.0 \n", | |
| "1 0.0 0.0 0.0 0.0 \n", | |
| "2 0.0 0.0 0.0 0.0 \n", | |
| "3 0.0 0.0 0.0 0.0 \n", | |
| "4 0.0 0.0 0.0 0.0 \n", | |
| "5 0.0 0.0 0.0 0.0 \n", | |
| "6 0.0 0.0 0.0 0.0 \n", | |
| "7 0.0 0.0 0.0 0.0 \n", | |
| "8 0.0 0.0 0.0 0.0 \n", | |
| "9 0.0 0.0 0.0 0.0 \n", | |
| "\n", | |
| "[10 rows x 139 columns]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# example of computing the features for structures in a dataframe\n", | |
| "# add CrystalNNFingerprint to the dataframe \n", | |
| "df_head = ssf.featurize_dataframe(df.head(10), \"structure\")\n", | |
| "# df[['material_id', 'formula']].head(2)\n", | |
| "df_head" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "3beecafa-5a9b-4bf5-bc7a-c71769d8ac77", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "139" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(df_head.columns)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "1bd0fdec-c41b-45d2-a46b-3d9e4ad81d88", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from sklearn.metrics.pairwise import paired_cosine_distances\n", | |
| "paired_cosine_distances(df_head.iloc[:, -122:].values, df_head.iloc[:, -122:].values)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "5da6e26e-c8b5-4a4d-87df-21f9d1cba9ac", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "mp-10003 mp-10037 0.962867\n", | |
| "mp-10010 mp-10063 0.997863\n", | |
| "mp-10015 mp-101 1.000000\n", | |
| "mp-10021 mp-10125 0.635249\n", | |
| "mp-10025 mp-10154 0.564862\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# compute the cosine distance between all pairs of structures\n", | |
| "all_distances = {}\n", | |
| "for i in range(5, len(df_head)):\n", | |
| " idxs_to_compare = list(range(i, len(df_head))) + list(range(i))\n", | |
| " distances = paired_cosine_distances(df_head.iloc[:, -122:].values, df_head.iloc[idxs_to_compare, -122:].values)\n", | |
| " labels = zip(df_head.material_id.values, df_head.iloc[idxs_to_compare].material_id.values)\n", | |
| " labels = [tuple(sorted(pair)) for pair in labels]\n", | |
| " all_distances.update({labels[j]: distances[j] for j in range(len(labels))})\n", | |
| "\n", | |
| "all_distances = pd.Series(all_distances)\n", | |
| "all_distances.head(5)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "f8c4016d-5eb7-43a8-8bd1-9035d04ddea0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<AxesSubplot:>" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQyUlEQVR4nO3df4zkd13H8efbOxraW7iWXBlJi+xpsFKuYrixgiQ4S0XXUkRME1sV26a6UQM2pgiHRmtiiDUEBYNILtAcBNI1tgWhBaRpGaqmLe6Wa/fK8aMCljuE2lQOtzTBK2//2KGc292d736/352Zz/l8JJvufGe+83l9dmZf/e53vt/vRWYiSSrPD4w7gCSpHgtckgplgUtSoSxwSSqUBS5Jhdo+ysF27dqV09PToxwSgEcffZQdO3aMfNy2lJ4fyp+D+cer9PzQbA6Li4sPZ+aZq5ePtMCnp6dZWFgY5ZAA9Pt9er3eyMdtS+n5ofw5mH+8Ss8PzeYQEf++1nJ3oUhSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqFGeiamJJVqet8tjdY/MNv+pQDcApekQlngklQoC1ySCmWBS1KhLHBJKpQFLkmFssAlqVAWuCQVygKXpEINLfCIuC4iHoqIQ2vc9/qIyIjYtTXxJEnrqbIFfgCYXb0wIp4NvBx4sOVMkqQKhhZ4Zt4BPLLGXX8FvAHItkNJkoartQ88In4ROJqZ97acR5JUUWQO34COiGng5szcExGnAZ8Efi4zj0XEV4BuZj68zrpzwBxAp9PZOz8/31b2ypaXl5mamhr5uG0pPT+UPwfzj9ck5F86eqzR+rt3bqs9h5mZmcXM7K5eXqfAzwNuA749uPts4GvA+Zn59Y2ep9vt5sLCwmazN9bv9+n1eiMfty2l54fy52D+8ZqE/G1cTrbuHCJizQLf9PXAM3MJeOYJT/wVNtgClyRtjSqHEV4P3AmcExFHIuLKrY8lSRpm6BZ4Zl465P7p1tJIkirzTExJKpQFLkmFssAlqVAWuCQVygKXpEJZ4JJUKAtckgplgUtSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUFX+UePrIuKhiDh0wrK3RMTnIuK+iPhgRJy+pSklSU9SZQv8ADC7atmtwJ7M/HHgC8CbWs4lSRpiaIFn5h3AI6uWfSIzjw9u3gWcvQXZJEkbiMwc/qCIaeDmzNyzxn0fAf4uM9+/zrpzwBxAp9PZOz8/3yhwHcvLy0xNTY183LaUnh/Kn4P5x2sS8i8dPdZo/d07t9Wew8zMzGJmdlcv394kUET8EXAc+MB6j8nM/cB+gG63m71er8mQtfT7fcYxbltKzw/lz8H84zUJ+S/fd0uj9Q/M7mh9DrULPCIuAy4CLsgqm/GSpFbVKvCImAXeCPxMZn673UiSpCqqHEZ4PXAncE5EHImIK4F3AE8Dbo2IgxHxri3OKUlaZegWeGZeusbi92xBFknSJngmpiQVygKXpEJZ4JJUKAtckgplgUtSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUBa4JBXKApekQlngklSoKv+o8XUR8VBEHDph2TMi4taI+OLgv2dsbUxJ0mpVtsAPALOrlu0DbsvM5wK3DW5LkkZoaIFn5h3AI6sWvwp47+D79wK/1G4sSdIwkZnDHxQxDdycmXsGt7+ZmaefcP9/Zeaau1EiYg6YA+h0Onvn5+dbiL05y8vLTE1NjXzctpSeH8qfg/nHaxLyLx091mj93Tu31Z7DzMzMYmZ2Vy/f3ihRBZm5H9gP0O12s9frbfWQT9Lv9xnHuG0pPT+UPwfzj9ck5L983y2N1j8wu6P1OdQ9CuUbEfEsgMF/H2ovkiSpiroF/mHgssH3lwH/0E4cSVJVVQ4jvB64EzgnIo5ExJXAtcDLI+KLwMsHtyVJIzR0H3hmXrrOXRe0nEWStAmeiSlJhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUBa4JBXKApekQlngklQoC1ySCmWBS1KhLHBJKpQFLkmFssAlqVCNCjwifj8i7o+IQxFxfUQ8ta1gkqSN1S7wiDgL+D2gm5l7gG3AJW0FkyRtrOkulO3AqRGxHTgN+FrzSJKkKiIz668ccRXwZuAx4BOZ+WtrPGYOmAPodDp75+fna49X1/LyMlNTUyMfty2l54fy52D+8ZqE/EtHjzVaf/fObbXnMDMzs5iZ3dXLaxd4RJwB3Aj8CvBN4O+BGzLz/eut0+12c2FhodZ4TfT7fXq93sjHbUvp+aH8OZh/vCYh//S+Wxqtf2B2R+05RMSaBd5kF8rPAl/OzP/MzP8BbgJ+usHzSZI2oUmBPwi8KCJOi4gALgAOtxNLkjRM7QLPzLuBG4B7gKXBc+1vKZckaYjtTVbOzGuAa1rKIknaBM/ElKRCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUBa4JBXKApekQlngklQoC1ySCmWBS1KhLHBJKpQFLkmFssAlqVAWuCQVygKXpEJZ4JJUKAtckgrVqMAj4vSIuCEiPhcRhyPixW0FkyRtrNE/agy8Hfh4Zl4cEacAp7WQSZJUQe0Cj4inAy8FLgfIzO8A32knliRpmMjMeitG/ASwH/gs8AJgEbgqMx9d9bg5YA6g0+nsnZ+fb5K3luXlZaampkY+bltKzw/lz8H84zUJ+ZeOHmu0/u6d22rPYWZmZjEzu6uXNynwLnAX8JLMvDsi3g58KzP/eL11ut1uLiws1BqviX6/T6/XG/m4bSk9P5Q/B/OP1yTkn953S6P1D8zuqD2HiFizwJt8iHkEOJKZdw9u3wC8sMHzSZI2oXaBZ+bXga9GxDmDRRewsjtFkjQCTY9CeR3wgcERKF8CrmgeSZJURaMCz8yDwJP2y0iStp5nYkpSoSxwSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpUBa4JBXKApekQlngklQoC1ySCmWBS1KhLHBJKlTjAo+IbRHxmYi4uY1AkqRq2tgCvwo43MLzSJI2oVGBR8TZwCuAd7cTR5JUVdMt8LcBbwC+2zyKJGkzIjPrrRhxEXBhZv5uRPSA12fmRWs8bg6YA+h0Onvn5+frp61peXmZqampkY/bltLyLx099qRlnVPhG48NX/e8s3ZuQaLmhr0Ga825qlHMubT30Gpt5W/yOjW1e+e22nOYmZlZzMzu6uVNCvzPgdcAx4GnAk8HbsrMX19vnW63mwsLC7XGa6Lf79Pr9UY+bltKyz+975YnLbv6vOO8dWn70HW/cu0rtiJSY8Neg7XmXNUo5lzae2i1tvI3eZ2aOjC7o/YcImLNAq+9CyUz35SZZ2fmNHAJcPtG5S1JapfHgUtSoYb/TVtBZvaBfhvPJUmqxi1wSSqUBS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqFaOZFH65v0a2RIdTS9pojv7Xa4BS5JhbLAJalQFrgkFcoCl6RCWeCSVCgLXJIKZYFLUqEscEkqlAUuSYWywCWpULULPCKeHRGfjIjDEXF/RFzVZjBJ0saaXAvlOHB1Zt4TEU8DFiPi1sz8bEvZJEkbqL0Fnpn/kZn3DL7/b+AwcFZbwSRJG4vMbP4kEdPAHcCezPzWqvvmgDmATqezd35+vvF4m7W8vMzU1NTIxwVYOnqs9rrnnbUTqJe/ybhboXMqfOOx4Y/73pwnzbDXoI3XuY6q41b9+Y/KZufc1u/wOH8vdu/cVnsOMzMzi5nZXb28cYFHxBTwKeDNmXnTRo/tdru5sLDQaLw6+v0+vV5v5ONCO5eTrZO/6eU+23b1ecd569LwPXaTepnRYa/BuC4bXHXcqj//UdnsnNv6HR7n78WB2R215xARaxZ4o6NQIuIpwI3AB4aVtySpXU2OQgngPcDhzPzL9iJJkqposgX+EuA1wMsi4uDg68KWckmShqi9Uywz/xmIFrNIkjbBMzElqVAWuCQVygKXpEJZ4JJUKAtckgplgUtSoSxwSSqUBS5JhZqcq9sM0eQiNFefd5zLx3SxIW3OuC425GusErkFLkmFssAlqVAWuCQVygKXpEJZ4JJUKAtckgplgUtSoSxwSSqUBS5JhbLAJalQjQo8ImYj4vMR8UBE7GsrlCRpuNoFHhHbgL8BfgE4F7g0Is5tK5gkaWNNtsDPBx7IzC9l5neAeeBV7cSSJA0TmVlvxYiLgdnM/M3B7dcAP5WZr131uDlgbnDzHODz9ePWtgt4eAzjtqX0/FD+HMw/XqXnh2ZzeE5mnrl6YZPLycYay570f4PM3A/sbzBOYxGxkJndcWZoovT8UP4czD9epeeHrZlDk10oR4Bnn3D7bOBrzeJIkqpqUuD/Cjw3InZHxCnAJcCH24klSRqm9i6UzDweEa8F/hHYBlyXmfe3lqxdY92F04LS80P5czD/eJWeH7ZgDrU/xJQkjZdnYkpSoSxwSSrUSVXgVU/tj4ifjIjHB8eyT4xh+SOiFxHHIuLg4OtPxpFzPVV+/oM5HIyI+yPiU6POOEyF1+APTvj5Hxq8j54xjqxrqZB/Z0R8JCLuHbwGV4wj53oq5D8jIj4YEfdFxKcjYs84cq4nIq6LiIci4tA690dE/PVgfvdFxAsbDZiZJ8UXKx+k/hvww8ApwL3Aues87nbgo8DF4869mfxAD7h53Fkb5D8d+CzwQ4Pbzxx37jrvoRMe/0rg9nHn3uRr8IfAXwy+PxN4BDhl3Nk3kf8twDWD738MuG3cuVfleynwQuDQOvdfCHyMlfNoXgTc3WS8k2kLvOqp/a8DbgQeGmW4Ckq/NEGV/L8K3JSZDwJkZumvwaXA9SNJVk2V/Ak8LSICmGKlwI+PNua6quQ/F7gNIDM/B0xHRGe0MdeXmXew8jNdz6uA9+WKu4DTI+JZdcc7mQr8LOCrJ9w+Mlj2hIg4C3g18K4R5qpqaP6BFw/+/P1YRDx/NNEqqZL/R4EzIqIfEYsR8RsjS1dN1deAiDgNmGVlY2BSVMn/DuB5rJx0twRclZnfHU28oarkvxf4ZYCIOB94DisnEZai8nusiian0k+aKqf2vw14Y2Y+vrIBMlGq5L+HlWsiLEfEhcCHgOdudbCKquTfDuwFLgBOBe6MiLsy8wtbHa6iSpeHGHgl8C+ZudHW1qhVyf/zwEHgZcCPALdGxD9l5re2OFsVVfJfC7w9Ig6y8j+gzzA5f0FUsZn32FAnU4FXObW/C8wPynsXcGFEHM/MD40k4caG5j/xlywzPxoR74yIXZk5CRf5qfLzPwI8nJmPAo9GxB3AC4BJKfDNXB7iEiZr9wlUy38FcG2u7JB9ICK+zMq+5E+PJuKGqv4OXAErHwgCXx58laLdS5CMe6d/ix8ebAe+BOzm+x+APH+Dxx9gsj7EHJof+EG+f/LV+cCD37s97q+K+Z/Hyv7L7cBpwCFgz7izb/Y9BOxkZT/njnFnrvEa/C3wp4PvO8BRYNe4s28i/+kMPnQFfouV/cljz74q4zTrf4j5Cv7vh5ifbjLWSbMFnuuc2h8Rvz24fxL3ez+hYv6Lgd+JiOPAY8AlOXhXjFuV/Jl5OCI+DtwHfBd4d2auebjVOGziPfRq4BO58pfExKiY/8+AAxGxxEqJvDEn4y+4qvmfB7wvIh5n5YimK8cWeA0RcT0rR4vtiogjwDXAU+CJ/B9l5UiUB4BvM/hrovZ4E/L7L0napJPpKBRJ+n/FApekQlngklQoC1ySCmWBS1KhLHBJKpQFLkmF+l9kZo10UZJD4QAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "all_distances.hist(bins=20)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.10" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment