Last active
December 23, 2015 19:59
-
-
Save tuolumne/6686732 to your computer and use it in GitHub Desktop.
Added Tika integration - Still need to make the file path dynamic.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <dataConfig> | |
| <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/tobacco" user="tobacco" password="tobacco"/> | |
| <dataSource type="BinFileDataSource" name="bin-file-ds"/> | |
| <script><![CDATA[ | |
| function addfield(row){ | |
| var patt=/(\d\d\d\d)([0][0-9]|[1][0-2])(\d\d)/i; | |
| var fieldName = row.get('code'); | |
| var val = row.get('val'); | |
| if (fieldName=='dd' || fieldName=='ddu' || fieldName=='dl' || fieldName=='ddprod' || fieldName=='ddship') { | |
| val = myFunction(val); | |
| } | |
| row.put(fieldName, val ); | |
| return row; | |
| } | |
| function myFunction(date) | |
| { | |
| var intDate = date; | |
| var patt=/(\d\d\d\d)([0][0-9]|[1][0-2])(\d\d)/i; | |
| var match = patt.exec(intDate); | |
| var year = match[1]; | |
| var month = match[2]; | |
| var day = match[3]; | |
| if ( year=='0000' || month =='00' || day =='00' ){ | |
| return null; | |
| }else if ( day == '00' && month == '00' ){ | |
| day = 1; | |
| month =1; | |
| }else if ( day == '00') { | |
| day =1; | |
| }else if (month == '00') { | |
| month = 1; | |
| }else { | |
| // Nothing ... | |
| } | |
| var d = new Date(year,month-1,day); | |
| return d.toISOString(); | |
| } | |
| ]]></script> | |
| <!-- | |
| <script language="groovy"><![CDATA[ | |
| def addfield(HashMap<String,Object>row, org.apache.solr.handler.dataimport.ContextImpl context ) { | |
| return row | |
| } | |
| ]]> | |
| </script> | |
| --> | |
| <document> | |
| <entity name="tobaccoitem" query="select a.tid,a.bates,b.code,b.name from mand a join collection b on a.collection_id=b.id where collection_id=5 and dm>20130500 LIMIT 200" | |
| deltaQuery="select itid from mand where last_updated > '${dataimporter.last_index_time}'"> | |
| <field column="tid" name="tid" /> | |
| <field column="bates" name="bates" /> | |
| <!--<field column="ddu" name="ddu" dateTimeFormat="yyyyMMdd" locale="en" /> --> | |
| <field column="code" name="cn" /> | |
| <entity name="opt" transformer="script:addfield" query="select b.code as code ,a.value as val from opt a join field b on a.itag=b.id where itid='${tobaccoitem.itid}'" | |
| deltaQuery="select b.code,a.value from opt a join field b on a.itag=b.id where itid='${tobaccoitem.itid}'" | |
| parentDeltaQuery="select id from item where ID=${opt.itid}" > | |
| </entity> | |
| <entity name="ocr" processor="TikaEntityProcessor" url="/home/sven/release/current/images/z/n/x/znx20j00/znx20j00.ocr" dataSource="bin-file-ds" format="text"> | |
| <field column="text" name="ot" /> | |
| </entity> | |
| </entity> | |
| </document> | |
| </dataConfig> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment