programming help
I'm looking at using spectrophores for comparing molecules https://openbabel.org/docs/dev/Fingerprints/spectrophore.html there is a commandline tool for creating the fingerprints and I've written the code below for calculating the euclidian distance between the finger prints.
The output from the obspectrophore tool is shown below for an individual record and the "ID_000002" is the identifier for each molecule record.
*******************************************
SPECTROPHORE(TM) CALCULATOR: OBSPECTROPHORE
*******************************************
Input file: /Users/username/Desktop/SampleFiles/publishedfrags.sdf
Normalization: No
Accuracy: 20 degrees
Stereo treatment: No
Resolution: 3 Angstrom
ID_000002 1.49549 2.12041 1.75615 3.2692 4.92872 3.76183 3.61444 3.72374 3.76777 6.28071 6.29508 3.72293 2.84116 2.85055 10.5207 10.5353 15.5128 13.9755 11.3767 20.0578 16.3018 16.4986 20.8431 15.5037 28.5218 31.3143 29.1957 24.1175 20.2475 20.7571 6.87729 23.1364 21.7477 26.0813 9.92146 25.4475 0.937131 0.87351 2.91767 2.9165 0.823157 0.845468 0.39668 3.15164 1.9814 0.923294 0.769458 3.01774
The file that I want to search will have many, many records.
My script is shown below, it works fine but I'm wondering if I can speed it up, in particular the calculation of the euclidian distance between the finger prints I've heard of the accelerate framework but can find much in terms of examples or tutorials of how to use it,
import AppKit
import PlaygroundSupport
import Foundation
var mainFilePath = "/Users/username/Desktop/SampleFiles/publishedfrags.spectrophore" // main file to search against
var theFullpathString = "/Users/username/Desktop/SampleFiles/publishedfrags.txt" // results output file
var iFilepath = "/Users/username/Desktop/SampleFiles/selectedfrag.sdf" //query molecule
var oFilePath = "/Users/username/Desktop/SampleFiles/selectedfrag.txt"
var obshellCommand = ""
var queryMoltitle = ""
var mol1array:[String] = []
var mol1Doubles: [Double] = []
var mainMoltitle = ""
var mainMolarray:[String] = []
var molRecord:[String] = []
var molDouble:[Double] = []
var obspectrophoreResults = ""
//MARK: function for submitting obabel shell scripts
func shell(_ command: String) -> String {
let task = Process()
task.launchPath = "/bin/bash"
task.arguments = ["-c", command]
let pipe = Pipe()
task.standardOutput = pipe
task.launch()
let data = pipe.fileHandleForReading.readDataToEndOfFile()
let output: String = NSString(data: data, encoding: String.Encoding.utf8.rawValue)! as String
return output
}
obshellCommand = "usr/local/bin/obspectrophore" + " -i " + iFilepath
let theText = shell(obshellCommand)
let index = theText.range(of: "Angstrom\n\n") // remove header
var thend = index?.upperBound
let forArray = theText[(thend!)..<theText.endIndex]
mol1array = forArray.components(separatedBy: "\t") //might need to be \t or 4 spaces
queryMoltitle = mol1array.first!
mol1array.removeFirst(1)
mol1array.removeLast(1) // need to remove because command returns ended with linefeed
mol1Doubles = mol1array.map { (value) -> Double in return Double(value)! }
//
let themainText = try! String(contentsOfFile: mainFilePath, encoding: String.Encoding.utf8)
let mainIndex = themainText.range(of: "Angstrom\n\n")
var themainend = mainIndex?.upperBound
let textForarray = themainText[(themainend!)..<themainText.endIndex]
let forMainArray = textForarray.components(separatedBy: "\n") as [String]
//let forMainArray = textForarray.split(separator: "\n")
//print(forMainArray)
//let mainMolarray = forMainArray.components(separatedBy: "\t") //might need to be \t or 4 spaces
let thelength = (forMainArray.count)
print(thelength) // file created by obspectrophore contains final carriage return
for n in 1...(thelength - 1) {
var sum = 0.0
var result = 0.0
molRecord = forMainArray[n - 1].components(separatedBy: "\t") as [String]
mainMoltitle = molRecord.first!
molRecord.removeFirst(1) // remove moltitle
molRecord.removeLast(1) // remove trailing tab
molDouble = molRecord.map { (value) -> Double in return Double(value)! }
//mol1Doubles is query molecule
for i in 0..<mol1Doubles.count
//calc distance
{
let temp = ((mol1Doubles[i] - molDouble[i])*(mol1Doubles[i] - molDouble[i]))
sum = sum + temp
result = Double(sqrt(sum))
}
let theResult = mainMoltitle + "\t" + String(format: "%0.2f", arguments:[result])
//print(mainMoltitle, result)
let myshellcommand = "echo " + theResult + " >> " + oFilePath
shell(myshellcommand)
}