############################################################################################## library(biogas) molMass("C6H12O6") molMass(c("C6H12O6", "CH3COOH")) molMass("H3C(CH2)5COOH") molMass('FeSO4(H2O)7') # hydrates molMass("(C6H12O6)0.24999 (H3COOH)0.75001") library(CHNOSZ) mass("H2O") mass("-1") # electron mass("H") me = makeup("C6H12O6", multiplier = 1, sum = TRUE, count.zero = TRUE); me ce = count.elements("C6H12O6"); ce i2A("C6H12O6") as.chemical.formula(ce, drop.zero = TRUE) as.chemical.formula(me, drop.zero = TRUE) thermo = thermo() thermo$element[,c(1,4)] thermo$protein pf <- protein.formula(thermo$protein); pf iA <- info("alanine") info(iA)$formula as.chemical.formula(makeup(iA)) library(BRAIN) # atomic composition from amino-acid sequence seq1 <- "AACD" aC1 <- getAtomsFromSeq(seq = seq1) aC1 seq2 <- AAString("ACCD") aC2 <- getAtomsFromSeq(seq = seq2) aC2 library(marelac) atomicweight atomicweight$C atomicweight$H library(ecipex) nistiso nistiso$mass nistiso$abundance nistiso$element library(IsoSpecR) data(isotopicData) isotopicData ######### Average mass / Molecular Weight ######################################################################## library(marelac) library(OrgMassSpecR) amu = list(atomicweight$C,atomicweight$H,atomicweight$O) MW1 <- MolecularWeight(formula = ListFormula("C60H30O30"), amu = amu) MW1 MW1 <- MolecularWeight(formula = ListFormula("C60H30O30")) MW1 library(marelac) MW2 <- molweight("C60H30O30") MW2 # molweight("C20H30O2") # molweight("CH3CH2CHCHCH2CHCHCH2CHCHCH2CHCHCH2CHCH(CH2)3COOH") # molweight(c("SiOH4", "NaHCO3", "C6H12O6", "Ca(HCO3)2", "Pb(NO3)2", "(NH4)2SO4")) library(BRAIN) # Bioc MW3 <- calculateAverageMass(ListFormula("C60H30O30")) MW3 MW3 <- calculateAverageMass(list(C=60, H=30, N=0, O=30, S=0)) MW3 ######### Monoisotopic mass ######################################################################## library(OrgMassSpecR) MM1 <- MonoisotopicMass(formula = ListFormula("C60H30O30"), charge = 1) MM1 MM1n <- MonoisotopicMass(formula = ListFormula("C60H30O30"), charge = 0) MM1n MM1 <- MonoisotopicMass(formula = ListFormula("C60H30O30"), charge = -1) MM1 ## with all carbon-13 atoms MonoisotopicMass(formula = list(C=4, H=7, N=3, O=1),isotopes = list(C = 13.0033548378),charge = 1) ## with 2 carbon-12 atoms and 2 carbon-13 atoms MonoisotopicMass(formula = list(C=2, H=7, N=3, O=1, x=2),isotopes = list(x = 13.0033548378),charge = 1) ## monoisotopic mass of cyanocobalamin (C63H88CoN14O14P) MonoisotopicMass(formula = list(C=63, H=88, N=14, O=14, P=1, x=1),isotopes = list(x = 58.9332002)) ######### Nominal mass ######################################################################## # C60H30O30 FORM = list(C=60, H=30, N=0, O=30, S=0) AMU = list(C=12, H=1, N=0, O=16, S=0) NMW <- sum(unlist(AMU)*unlist(FORM)) library(CHNOSZ) form = "C60H30O30" MW = mass(form); MW nommass = as.vector(unlist(lapply(names(count.elements(form)),function(x){round(mass(x),0)}))) NMW = sum(as.vector(count.elements(form))*nommass); NMW # Nominal mass NominalMass = function(formula){ nommass = as.vector(unlist(lapply(names(count.elements(formula)),function(x){round(mass(x),0)}))) return(sum(as.vector(count.elements(formula))*nommass))} NominalMass(form) ######### Isotopic Distribution ######################################################################## library(OrgMassSpecR) x <- IsotopicDistribution(formula = ListFormula("C60H30O30")) plot(x$mz, x$percent,type = "h",xlab = "m/z",ylab = "intensity (%)",main = "Isotopic Distribution, C60H30O30",lwd=2) abline(h=0,lty=2) library(ecipex) xx <- ecipex("C60H30O30", isoinfo = nistiso, limit = 0.0005,id = FALSE, sortby = "mass") lmass = xx$C60H30O30$mass labun = xx$C60H30O30$abundance plot(lmass,labun,type="h",xlab="m/z",ylab="abundance") abline(h=0) library(rcdk) formula1 <- get.formula("C60H30O30", charge = 1) isot1 <- get.isotopes.pattern(formula1,minAbund=0.00005) plot(isot1[,1], isot1[,2],type = "h",xlab = "m/z",ylab = "intensity (%)",main = "Isotopic Distribution, C60H30O30",lwd=2) abline(h=0,lty=2) library(ecipex) formC = function(xx) paste("C",as.character(xx),sep="") lC2 = sapply(c(18:120),formC) x2 <- ecipex(lC2, isoinfo = nistiso, limit = 0.0005,id = FALSE, sortby = "mass") # sortby = c("mass","abundance") lCmass2 = unlist(sapply(x2,function(xx) xx$mass)) lCabun2 = unlist(sapply(x2,function(xx) xx$abundance)) plot(lCmass2,lCabun2,type="h",xlab="m/z",ylab="abundance",xlim = c(220,1450)) abline(h=0) plot(lCmass2,lCabun2,type="h",xlab="m/z",ylab="abundance",xlim = c(400,600)) abline(h=0) library(Rdisop) Mm <- getMolecule("C60", z=0, elements=initializeElements(c("C","H","O"))) getMass(Mm) getFormula(Mm) getIsotope(Mm) getIsotope(Mm, index=1) getValid(Mm) isotopes <- getIsotope(Mm, seq(1,10)) isotopes plot(isotopes[1,], isotopes[2,],type = "h",xlab = "m/z",ylab = "abundance",main = "Isotopic Distribution, C60H30O30",lwd=2) abline(h=0,lty=2) plot(isotopes[1,], 100*isotopes[2,]/max(isotopes[2,]),type = "h",xlab = "m/z",ylab = "intensity (%)",main = "Isotopic Distribution, C60H30O30",lwd=2) abline(h=0,lty=2) ## ostatni library(BRAIN) aC = list(C=60, H=30, N=0, O=30, S=0) nrPeaks = calculateNrPeaks(aC) nrPeaks res <- calculateIsotopicProbabilities(aC = aC, stopOption="nrPeaks", nrPeaks = nrPeaks) res library(IsoSpecR) IsoSpecify(molecule = c(C=10,H=22,O=1), stopCondition = .9999 ) ######### Generation formula from mass ######################################################################## library(Rdisop) # Glutamic acid C5H9NO4, M = 147.12 glut = decomposeIsotopes(c(147.0529,148.0563), c(100.0,5.561173)) glut glut$formula glut$valid library(Rdisop) library(OrgMassSpecR) masses = c(719.924,720.936,721.910) intensities = c(1.00,0.59,0.15) mass = masses[1] dmp = decomposeMass(mass=mass, ppm=1, mzabs=0.1, elements=initializeElements(c("C","H","O")), filter=NULL, z=1, maxisotopes = length(masses), minElements="C30", maxElements="C80") dmp$formula plot(0,0,xlim=c(0,.35),ylim=c(0,.65),col=0,xlab="dM",ylab="dInt") abline(v=0,h=0,lty=2) for(i in 1:length(dmp$formula)){ fxp <- dmp$isotopes[[i]][2,]/max(dmp$isotopes[[i]][2,]) dmass <- sum(abs(as.vector(dmp$isotopes[[i]][1,])-as.vector(masses))) dint <- sum(abs(as.vector(fxp) - as.vector(intensities))) points(dmass,dint,col=0) text(dmass,dint,labels=i,cex=.7) } plot(intensities[2],intensities[3],xlim=c(-.7,.7),ylim=c(-.2,.15),pch=16,xlab="M+1",ylab="M+2") for(i in 1:length(dmp$formula)){ fxp <- dmp$isotopes[[i]][2,]/max(dmp$isotopes[[i]][2,]) dm2 <- fxp[2]-intensities[2] dm3 <- fxp[3]-intensities[3] points(dm2,dm3,col=0) text(dm2,dm3,labels=i,cex=.7) } NN = 7 dmp$formula[NN] # NN = 1, 11, 25, 47, 54, 61, 69, 80, 7, 8, 3, 5 plot(masses, intensities,type = "h",xlab = "m/z",ylab = "intensity (%)",main = "",lwd=2,xlim=c(min(min(masses),min(dmp$isotopes[[NN]][1,])),max(max(masses),max(dmp$isotopes[[NN]][1,])))) points(dmp$isotopes[[NN]][1,],(dmp$isotopes[[NN]][2,]/max(dmp$isotopes[[NN]][2,])),type="h",col=2,lwd=2) abline(h=0,lty=2) library(Rdisop) dip = decomposeIsotopes(masses=c(719.924,720.936,721.910), intensities=c(1.00,0.59,0.15), ppm=1.0, mzabs=0.005, elements=initializeElements(c("C","H","O")), filter=NULL, z=0, maxisotopes = 10, minElements="C30", maxElements="C80") dip$formula NN = 1 dmp$formula[NN] plot(masses, intensities,type = "h",xlab = "m/z",ylab = "intensity (%)",main = "",lwd=2,xlim=c(min(min(masses),min(dmp$isotopes[[NN]][1,])),max(max(masses),max(dmp$isotopes[[NN]][1,])))) points(dmp$isotopes[[NN]][1,],(dmp$isotopes[[NN]][2,]/max(dmp$isotopes[[NN]][2,])),type="h",col=2,lwd=2) abline(h=0,lty=2) dip2 = decomposeIsotopes(masses=c(719.924,720.936,721.910), intensities=c(1.00,0.59,0.15), ppm=1.0, mzabs=0.005, elements=initializeElements(c("C","H","O")), filter=NULL, z=0, maxisotopes = 10, minElements="H0", maxElements="H10") dip2$formula library(InterpretMSSpectrum) masses2 <- c(masses, rep(0,(length(dmp$isotopes[[NN]][1,]) - length(masses)))) intensities2 <- c(intensities, rep(0,(length(dmp$isotopes[[NN]][2,]) - length(intensities)))) # mass defect weighted score for an mz/int values measure for an isotopic cluster in comparison to the theoretically expected pattern. mScore(obs = cbind(masses2, intensities2), the = cbind(dmp$isotopes[[NN]][1,],dmp$isotopes[[NN]][2,])) # extremne pomale x umi i anorganiku library(rcdk) Mass=480 listform = generate.formula(Mass, window=40, validation=FALSE, charge=1, elements=list(c("Se",0,10),c("Ga",0,50))) # c("Se",0,50),c("Ga",0,50),c("O",0,10),c("H",0,5) listform mfSet <- generate.formula(719.924, window=0.1,elements=list(c("C",30,80),c("H",0,0),c("O",0,70)),validation=TRUE) mfSet[[6]] mfSet <- generate.formula(719.924, window=0.1,elements=list(c("C",30,80),c("H",0,0),c("O",0,70)),validation=FALSE) mfSet[[6]] isvalid.formula(mfSet[[6]], rule = "RDBE") # rule = c("nitrogen","RDBE") for (i in 1:length(mfSet)){print(mfSet[i])} ############################################################################################################################################################################ ##################################################################################### library(MALDIquant) library(MALDIrppa) library(OrgMassSpecR) library(PROcess) #### DATA INPUT #### path = "c:\\Users\\lubop\\Documents\\OndraJasek\\" file.names <- dir(path, pattern =".txt");file.names ss <- function(Xx){substr(Xx, start=1, stop=(nchar(Xx)-8))} snam <- ss(file.names) snam sss = function(x){strsplit(x,"-")} snams <- sapply(snam,sss) snams = as.data.frame(snams) snams ss4 <- function(Xx){substr(Xx, start=2, stop=4)} snam4 <- ss4(file.names) snam4 RDMq <- function(X){ MSVxxx <- readLines(paste("c:\\Users\\lubop\\Documents\\OndraJasek\\s",X,"-low300-pos-1700001.txt",sep="")) MSVxxh <- MSVxxx[c(3,4)] MSVxxx <- MSVxxx[-c(1:7)] MSVxxl <- lapply(MSVxxx,function(x){strsplit(x,"\t")}) # strsplit(x," ") MSVxxm <- matrix(unlist(MSVxxl), ncol = 2, byrow = TRUE) MSS1 <- as.vector(as.numeric(unlist(MSVxxm[,1]))) MSS2 <- as.vector(as.numeric(unlist(MSVxxm[,2]))) # sn <- strsplit(ss(X), "-") # sp <- paste(sn[[1]][2:(length(sn[[1]])-1)], collapse = "-") snm <- list(MSVxxh[2]) #list( ss(X),X,MSVxxh) s <- createMassSpectrum(mass=MSS1, intensity=MSS2, metaData=snm) # metaData: sample name=snm[[1]][1],sample number=snm[[2]][1],sample type=snm[[2]][2],time=sn[[2]][3],passage=sn[[2]][4],dish=sn[[2]][5],spot=sn[[2]][6] return(s) } RMSq <- lapply(snam4,RDMq) names(RMSq) = snam4 summary(RMSq) str(RMSq) as.data.frame(snam) as.data.frame(snam4) isMassSpectrumList(RMSq) isMassSpectrum(RMSq[[8]]) RMSq[[1]]@mass RMSq[[1]]@intensity RMSq[[1]]@metaData RMSq[["356"]] # any spectrum is empty any(sapply(RMSq, isEmpty)) findEmptyMassObjects(RMSq) # remove empty spectra removeEmptyMassObjects(RMSq) # all spectra contain the same number of data points table(sapply(RMSq, length)) np=sapply(RMSq,length) npc = sapply(unique(np),function(xx) return(snam[which(np==xx)])) names(npc)=unique(np); npc # the same mass difference between each data point all(sapply(RMSq, isRegular)) # identification of possibly faulty,low-quality raw mass spectra. screenSpectra(RMSq) screenSpectra(RMSq)$fspectra summary(screenSpectra(RMSq)) # number of samples length(RMSq) # summary summarySpectra(RMSq, digits = 4) #### resampling newMZ <- c(seq(from=90,to=1829,by=0.001)) length(newMZ) resampMSs <- function(X){ yi <- spline(x=X@mass,y=X@intensity,n=10000,xout=newMZ,method="fmm",ties=mean) # method = c("fmm", "periodic", "natural", "monoH.FC", "hyman") rsmp <- createMassSpectrum(mass=yi$x, intensity=yi$y, metaData=X@metaData) return(rsmp) } RMSq <- lapply(RMSq,function(X){resampMSs(X)}) #trimming RMSq <- trim(RMSq,c(90,1829)) RMSqt <- trim(RMSq,c(100,2000)) ## remove all mass lower 3000 trim(s, range=c(1829, Inf)) ## remove all mass higher 8000 trim(s, range=c(0, 90)) ## transformation RMSqtr <- transformIntensity(RMSq,method="sqrt") # method=c("sqrt", "log", "log2", "log10") # normalizace mezi 0 a 1 library(clusterSim) scalemm <- function(x) data.Normalization (x,type="n4") # define scaling function,library(clusterSim) RMSq <- transfIntensity(RMSq, fun = scalemm) scale.max <- function(x){x/max(x)} # define scaling function RMSqt1 <- transfIntensity(RMSq, fun = scale.max) library(jjb) imin = as.vector(sapply(snam4, function(mm) min(RMSq[[mm]]@intensity))) imax = as.vector(sapply(snam4, function(mm) max(RMSq[[mm]]@intensity))) names(imin) = names(imax) = snam4 scale.max2 <- function(x){x/max(x)} # define scaling function RMSqt2 <- transfIntensity(RMSq, fun = scale.max) ##### SPECTRA TREATMENT ########################################################################################################################################### as.data.frame(snam4) nm = "357" nm = as.character(nm) RMSq[[nm]]@metaData plot(RMSq[[nm]]@mass,RMSq[[nm]]@intensity,type="l",xlab="m/z",ylab="intensity (mV)",main=nm) # estimated noise estimateNoise(RMSq[[nm]],method="MAD") unique(estimateNoise(RMSq[[nm]],method="MAD")[,2]) # method=c("MAD", "SuperSmoother") ## calibrateIntensity(RMSq, method= "TIC", range = c()) # scaling factor is calculated on the mass range from range[1L] to range[2L] and applied to the whole spectrum RMSqc <- calibrateIntensity(RMSq, method= "TIC") # method=c("TIC", "PQN", "median") ## decreasing their m/z resolution RMSqr <- redResolution(RMSq, by = 2) ######################################################################################################################################## ##SPEKTRA ALIGNING - SROVNANI KALIBRACE RMSqa <- alignSpectra(RMSq, halfWindowSize=20, noiseMethod="MAD", SNR=3, tolerance=0.002, warpingMethod="lowess") #method=c("lowess", "linear", "quadratic", "cubic") ## add metadata addMetadata(RMSq, metadata, pos) # metadata Vector containing the metadata to be included for each element of x (same length as x). # pos Position of the new metadata within the metaData slot list of each element of x. #### peaks #################################################################################################################################### ## estimated noise estimateNoise(RMSq[[1]],method="MAD") noise = unique(estimateNoise(RMSq[[1]],method="MAD")[,2]) # method=c("MAD", "SuperSmoother") # peak > noise*SNR SNR=500 (treshold = noise*SNR) peaks <- lapply(RMSq,function(X){detectPeaks(X,method="MAD",SNR=SNR)}) # detectPeaks(object,halfWindowSize=20, method=c("MAD", "SuperSmoother"), SNR=2,...) #method=c("MAD", "SuperSmoother") summary(peaks) peaks <- binPeaks(peaks,method="relaxed") summary(peaks) plot(RMSq[[nm]]) points(peaks[[nm]], col = "red", pch = 4, lwd = 2) plot(RMSq[[nm]],xlim=c(250,400)) points(peaks[[nm]], col = "red", pch = 4, lwd = 2) ##TOLERANCE PIKU VIC JAK VE TRECH (0.08) 1/length(peaks) peaksz <- filterPeaks(peaks, minFrequency=0.1,mergeWhitelists=TRUE) points(peaksz[[1]], col = "green", pch = 4, lwd = 2) summary(peaksz) IntMr <- intensityMatrix(peaksz, RMSq) IntMr <- round(as.data.frame(IntMr),2) colnames(IntMr) <- unlist(lapply(colnames(IntMr),function(x){return(as.character(round(as.numeric(x),2)))})) rownames(IntMr) <- snam ######################################################################################################################################################################