Skip to content

Instantly share code, notes, and snippets.

@tuolumne
Last active December 23, 2015 19:59
Show Gist options
  • Select an option

  • Save tuolumne/6686732 to your computer and use it in GitHub Desktop.

Select an option

Save tuolumne/6686732 to your computer and use it in GitHub Desktop.
Added Tika integration - Still need to make the file path dynamic.
<dataConfig>
<dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/tobacco" user="tobacco" password="tobacco"/>
<dataSource type="BinFileDataSource" name="bin-file-ds"/>
<script><![CDATA[
function addfield(row){
var patt=/(\d\d\d\d)([0][0-9]|[1][0-2])(\d\d)/i;
var fieldName = row.get('code');
var val = row.get('val');
if (fieldName=='dd' || fieldName=='ddu' || fieldName=='dl' || fieldName=='ddprod' || fieldName=='ddship') {
val = myFunction(val);
}
row.put(fieldName, val );
return row;
}
function myFunction(date)
{
var intDate = date;
var patt=/(\d\d\d\d)([0][0-9]|[1][0-2])(\d\d)/i;
var match = patt.exec(intDate);
var year = match[1];
var month = match[2];
var day = match[3];
if ( year=='0000' || month =='00' || day =='00' ){
return null;
}else if ( day == '00' && month == '00' ){
day = 1;
month =1;
}else if ( day == '00') {
day =1;
}else if (month == '00') {
month = 1;
}else {
// Nothing ...
}
var d = new Date(year,month-1,day);
return d.toISOString();
}
]]></script>
<!--
<script language="groovy"><![CDATA[
def addfield(HashMap<String,Object>row, org.apache.solr.handler.dataimport.ContextImpl context ) {
return row
}
]]>
</script>
-->
<document>
<entity name="tobaccoitem" query="select a.tid,a.bates,b.code,b.name from mand a join collection b on a.collection_id=b.id where collection_id=5 and dm>20130500 LIMIT 200"
deltaQuery="select itid from mand where last_updated > '${dataimporter.last_index_time}'">
<field column="tid" name="tid" />
<field column="bates" name="bates" />
<!--<field column="ddu" name="ddu" dateTimeFormat="yyyyMMdd" locale="en" /> -->
<field column="code" name="cn" />
<entity name="opt" transformer="script:addfield" query="select b.code as code ,a.value as val from opt a join field b on a.itag=b.id where itid='${tobaccoitem.itid}'"
deltaQuery="select b.code,a.value from opt a join field b on a.itag=b.id where itid='${tobaccoitem.itid}'"
parentDeltaQuery="select id from item where ID=${opt.itid}" >
</entity>
<entity name="ocr" processor="TikaEntityProcessor" url="/home/sven/release/current/images/z/n/x/znx20j00/znx20j00.ocr" dataSource="bin-file-ds" format="text">
<field column="text" name="ot" />
</entity>
</entity>
</document>
</dataConfig>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment