2014年10月21日 星期二

Binary Log File (hex) to TSV

/*
*  This program transfer Binary File to TSV File(using tab for column spliting)
*  Usage:
* $ scalac ProberLogTSV.scala
* $ scala -classpath . ProberLogTSV /home/hduser/log00001.dat
*  Result:
*       $ cat /home/hduser/testfile.tsv
*
*/
import scala.io.Source
import scala.io.Codec

import java.nio.charset.CodingErrorAction
import java.nio.file.{Paths, Files}
import java.nio.charset.StandardCharsets
import java.io._
import java.lang._

object PorberLogtoTSV {
def main(args: Array[String]) {

//val logFileDir   = args(0)
//val totalFiles   = java.lang.Integerargs.getInteger(1)
//var fileDirArray = new Array[String](totalFiles)
val logFile = args(0)

// Convert Binary File(ex: hex file) to ASCII String with lines
val logData = binaryToString(logFile)

// Convert String to Tab File type
val tabFile = convertToTabFile(logData)

Files.write(Paths.get("/home/hduser/testfile.tsv"), tabFile.getBytes(StandardCharsets.UTF_8))
    //print(tabFile)
}

def binaryToString(logFile: String) = {
val src = scala.io.Source.fromFile(logFile, "ISO-8859-1")
val charArray = src.toArray // Translate Source to Char Array
src.close()
var strBuf = new StringBuffer()
for(i <- 0 to charArray.length-1){
if(charArray(i).equals('\0')){ // Replace '\0' to '\n' for split lines
charArray(i) = '\n'
}
strBuf.append(charArray(i))
}
strBuf.toString
}

def convertToTabFile(logData : String) = {
val lines:Array[String] = logData.split(System.getProperty("line.separator"))
for(i <- 0 until lines.length){
var charArray = lines(i).toCharArray()
var count = 1
for(j <- 0 to charArray.length-1){
if(charArray(j).equals(' ') && count <=3){
charArray(j) = '\t'
count+=1
}
}
lines(i) = new String(charArray)
}
var strBuf = new StringBuffer()
for(str <- lines){
strBuf.append(str).append('\n')
}
strBuf.toString()
}

}

沒有留言:

張貼留言