# # Domaci ukol 05 # Homework 05 # # 8 bodu # 8 point # def is_dna_sequence(sequence): """ Funkce is_dna_sequence testuje, jestli se opravdu jedna o DNA sekvenci, sekvenci obsahujici pouze a,c,g,t Function is_dna_sequence testing if parameter sequence is realy DNA sequence, string cointains only a,c,g,t >>> is_dna_sequence('agctagtacgtacgtacgata') True >>> is_dna_sequence('GACTTACGATCGACTGATCGA') True >>> is_dna_sequence('GACTTACGATCagtctatcGA') True >>> is_dna_sequence('agctagtacgtfcgtacgata') False >>> is_dna_sequence('agctagtacgtfcgtacg ta') False >>> is_dna_sequence('>agctagtacgtfcgtacgaa') False """ return True def reverse_complement_sequence(sequence): """ Funkce reverse_complement_sequence vrati komplementarni sekvenci v opacnem poradi. Komplementarni sekvence je sekvence, kde je a zameneno za T, t za A, c za G a g za C. Function reverse_complement_sequence return reverse complement sequence. >>> reverse_complement_sequence('agtagtagt') 'ACTACTACT' >>> reverse_complement_sequence('GACGCAGTGGATCCGTACAATAG') 'CTATTGTACGGATCCACTGCGTC' """ return 'ACTACTACT' def reading_frames(sequence): """ Funkce reading_frames vrati 6 ctecich ramcu pro preklad do proteinove sekvence. Prvni tri cteci ramce vychazi z puvodni sekvence, druhe tri vychazi z reverzni komplementarni sekvence. Prvni a ctvrty ramec je vracena v cele delce, druhy a paty je posunuty o jednu bazi doprava (puvodni sekvence bez prvniho prvku) a podobne treti a sesty je posunuty o dve baze doprava (puvodni sekvence bez dvou prvku). # HINT: use function reverse_complement_sequence >>> reading_frames('agtagtagt') ['AGTAGTAGT', 'GTAGTAGT', 'TAGTAGT', 'ACTACTACT', 'CTACTACT', 'TACTACT'] >>> reading_frames('GACGCAGTGGATCCGTACAATAG') ['GACGCAGTGGATCCGTACAATAG', 'ACGCAGTGGATCCGTACAATAG', 'CGCAGTGGATCCGTACAATAG', 'CTATTGTACGGATCCACTGCGTC', 'TATTGTACGGATCCACTGCGTC', 'ATTGTACGGATCCACTGCGTC'] """ return ["AGTAGTAGT", "GTAGTAGT", "TAGTAGT", "ACTACTACT", "CTACTACT", "TACTACT"] def translate_codon(codon): # credits for Jaroslav Velcovsky # Retrieved from http://www.petercollingridge.co.uk/python-bioinformatics-tools/codon-table bases = ['T', 'C', 'A', 'G'] codons = [a+b+c for a in bases for b in bases for c in bases] # generator - oblibena konstrukce v Pythonu amino_acids = 'FFLLSSSSYY--CC-WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' codon_table = dict(zip(codons, amino_acids)) if codon_table.has_key(codon): return codon_table[codon] return "?" def translate(sequence): """ >>> translate('agtagtagt') ['SSS', 'VV', '--', 'TTT', 'LL', 'YY'] >>> translate('GACGCAGTGGATCCGTACAATAG') ['DAVDPYN', 'TQWIRTI', 'RSGSVQ-', 'LLYGSTA', 'YCTDPLR', 'IVRIHCV'] """ if not is_dna_sequence(sequence): return None protein_sequences = [] for frame in reading_frames(sequence): protein_sequence = "" i = 0 while i+3 <= len(frame): codon = frame[i:i+3].upper() protein_sequence += translate_codon(codon) i += 3 protein_sequences.append(protein_sequence) return protein_sequences ### KONEC ### END # Neupravujte nasledujici kod. Don't update following code. correct = 0 wrong = 0 print("Testing functions:") print("-"*40) seq1 = 'agtagtagt' result1 = translate(seq1) if result1 == ['SSS', 'VV', '--', 'TTT', 'LL', 'YY']: print("{:37} {}".format("Function #1 (translate) test 1:", "ok *")) correct += 1 else: print("{:37} {}".format("Function #1 (translate) test 1:", "ko -")) wrong += 1 seq2 = 'GACGCAGTGGATCCGTACAATAG' result2 = translate(seq2) if result2 == ['DAVDPYN', 'TQWIRTI', 'RSGSVQ-', 'LLYGSTA', 'YCTDPLR', 'IVRIHCV']: print("{:37} {}".format("Function #1 (translate) test 2:", "ok *")) correct += 1 else: print("{:37} {}".format("Function #1 (translate) test 2:", "ko -")) wrong += 1 print("-"*40) print(seq1) print(result1) print(seq2) print(result2) print("-"*40) print("Spravne/correct: {}".format(correct)) print("Spatne/wrong: {}".format(wrong)) #import doctest #doctest.testmod()