Macs in Chemistry

Insanely Great Science

programming help

 

I'm looking at using spectrophores for comparing molecules https://openbabel.org/docs/dev/Fingerprints/spectrophore.html there is a commandline tool for creating the fingerprints and I've written the code below for calculating the euclidian distance between the finger prints.

The output from the obspectrophore tool is shown below for an individual record and the "ID_000002" is the identifier for each molecule record.

*******************************************
SPECTROPHORE(TM) CALCULATOR: OBSPECTROPHORE
*******************************************

Input file:       /Users/username/Desktop/SampleFiles/publishedfrags.sdf
Normalization:    No
Accuracy:         20 degrees
Stereo treatment: No
Resolution:       3 Angstrom

ID_000002   1.49549 2.12041 1.75615 3.2692  4.92872 3.76183 3.61444 3.72374 3.76777 6.28071 6.29508 3.72293 2.84116 2.85055 10.5207 10.5353 15.5128 13.9755 11.3767 20.0578 16.3018 16.4986 20.8431 15.5037 28.5218 31.3143 29.1957 24.1175 20.2475 20.7571 6.87729 23.1364 21.7477 26.0813 9.92146 25.4475 0.937131    0.87351 2.91767 2.9165  0.823157    0.845468    0.39668 3.15164 1.9814  0.923294    0.769458    3.01774

The file that I want to search will have many, many records.

My script is shown below, it works fine but I'm wondering if I can speed it up, in particular the calculation of the euclidian distance between the finger prints I've heard of the accelerate framework but can find much in terms of examples or tutorials of how to use it,

import AppKit
import PlaygroundSupport
import Foundation


var mainFilePath = "/Users/username/Desktop/SampleFiles/publishedfrags.spectrophore" // main file to search against
var theFullpathString = "/Users/username/Desktop/SampleFiles/publishedfrags.txt" // results output file

var iFilepath = "/Users/username/Desktop/SampleFiles/selectedfrag.sdf" //query molecule

var oFilePath = "/Users/username/Desktop/SampleFiles/selectedfrag.txt"

var obshellCommand = ""

var queryMoltitle = ""
var mol1array:[String] = []
var mol1Doubles: [Double] = []
var mainMoltitle = ""
var mainMolarray:[String] = []
var molRecord:[String] = []
var molDouble:[Double] = []

var obspectrophoreResults = ""

//MARK: function for submitting obabel shell scripts
func shell(_ command: String) -> String {
       let task = Process()
       task.launchPath = "/bin/bash"
       task.arguments = ["-c", command]

       let pipe = Pipe()
       task.standardOutput = pipe
       task.launch()

       let data = pipe.fileHandleForReading.readDataToEndOfFile()
       let output: String = NSString(data: data, encoding: String.Encoding.utf8.rawValue)! as String

       return output
    }
obshellCommand = "usr/local/bin/obspectrophore" + " -i " + iFilepath

let theText = shell(obshellCommand)

let index = theText.range(of: "Angstrom\n\n") // remove header
var thend = index?.upperBound

let forArray = theText[(thend!)..<theText.endIndex]
mol1array = forArray.components(separatedBy: "\t")  //might need to be \t or 4 spaces
queryMoltitle = mol1array.first!

mol1array.removeFirst(1)
mol1array.removeLast(1) // need to remove because command returns ended with linefeed

mol1Doubles = mol1array.map { (value) -> Double in return Double(value)! }


//

let themainText = try! String(contentsOfFile: mainFilePath, encoding: String.Encoding.utf8)

let mainIndex = themainText.range(of: "Angstrom\n\n")
var themainend = mainIndex?.upperBound

let textForarray = themainText[(themainend!)..<themainText.endIndex]

let forMainArray = textForarray.components(separatedBy: "\n") as [String]
//let forMainArray = textForarray.split(separator: "\n")
//print(forMainArray)

//let mainMolarray = forMainArray.components(separatedBy: "\t")  //might need to be \t or 4 spaces


let thelength = (forMainArray.count)
print(thelength) // file created by obspectrophore contains final carriage return
for n in 1...(thelength - 1) {
    var sum = 0.0
    var result = 0.0
    molRecord = forMainArray[n - 1].components(separatedBy: "\t") as [String]
    mainMoltitle = molRecord.first!
    molRecord.removeFirst(1) // remove moltitle
    molRecord.removeLast(1)  // remove trailing tab
    molDouble = molRecord.map { (value) -> Double in return Double(value)! }
    //mol1Doubles is query molecule
    for i in 0..<mol1Doubles.count
    //calc distance
    {
      let temp =  ((mol1Doubles[i] - molDouble[i])*(mol1Doubles[i] - molDouble[i]))
        sum = sum + temp
        result = Double(sqrt(sum))

    }
    let theResult = mainMoltitle + "\t" + String(format: "%0.2f", arguments:[result])
 //print(mainMoltitle, result)


    let myshellcommand = "echo " + theResult + " >> " + oFilePath
    shell(myshellcommand)

}


blog comments powered by Disqus