# Autor: Stanislav Geidl # C2184 Uvod do programovani v Pythonu # Cviceni 06 def is_dna_sequence(sequence): """ Funkce is_dna_sequence testuje, jestli se opravdu jedna o DNA sekvenci, sekvenci obsahujici pouze a,c,g,t Function is_dna_sequence testing if parameter sequence is realy DNA sequence, string cointains only a,c,g,t >>> is_dna_sequence('agctagtacgtacgtacgata') True >>> is_dna_sequence('GACTTACGATCGACTGATCGA') True >>> is_dna_sequence('GACTTACGATCagtctatcGA') True >>> is_dna_sequence('agctagtacgtfcgtacgata') False >>> is_dna_sequence('agctagtacgtfcgtacg ta') False >>> is_dna_sequence('>agctagtacgtfcgtacgaa') False """ return True def reverse_complement_sequence(sequence): """ Funkce reverse_complement_sequence vrati komplementarni sekvenci v opacnem poradi. Komplementarni sekvence je sekvence, kde je a zameneno za T, t za A, c za G a g za C. Function reverse_complement_sequence return reverse complement sequence. >>> reverse_complement_sequence('agtagtagt') 'ACTACTACT' >>> reverse_complement_sequence('GACGCAGTGGATCCGTACAATAG') 'CTATTGTACGGATCCACTGCGTC' """ return 'ACTACTACT' def reading_frames(sequence): """ Funkce reading_frames vrati 6 ctecich ramcu pro preklad do proteinove sekvence. Prvni tri cteci ramce vychazi z puvodni sekvence, druhe tri vychazi z reverzni komplementarni sekvence. Prvni a ctvrty ramec je vracena v cele delce, druhy a paty je posunuty o jednu bazi doprava (puvodni sekvence bez prvniho prvku) a podobne treti a sesty je posunuty o dve baze doprava (puvodni sekvence bez dvou prvku). # HINT: use function reverse_complement_sequence >>> reading_frames('agtagtagt') ['AGTAGTAGT', 'GTAGTAGT', 'TAGTAGT', 'ACTACTACT', 'CTACTACT', 'TACTACT'] >>> reading_frames('GACGCAGTGGATCCGTACAATAG') ['GACGCAGTGGATCCGTACAATAG', 'ACGCAGTGGATCCGTACAATAG', 'CGCAGTGGATCCGTACAATAG', 'CTATTGTACGGATCCACTGCGTC', 'TATTGTACGGATCCACTGCGTC', 'ATTGTACGGATCCACTGCGTC'] """ return ["AGTAGTAGT", "GTAGTAGT", "TAGTAGT", "ACTACTACT", "CTACTACT", "TACTACT"] def translate_codon(codon): # credits for Jaroslav Velcovsky # Retrieved from http://www.petercollingridge.co.uk/python-bioinformatics-tools/codon-table bases = ['T', 'C', 'A', 'G'] codons = [a+b+c for a in bases for b in bases for c in bases] # generator - oblibena konstrukce v Pythonu amino_acids = 'FFLLSSSSYY--CC-WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' codon_table = dict(zip(codons, amino_acids)) if codon in codon_table: return codon_table[codon] return "?" def translate(sequence): """ >>> translate('agtagtagt') ['SSS', 'VV', '--', 'TTT', 'LL', 'YY'] >>> translate('GACGCAGTGGATCCGTACAATAG') ['DAVDPYN', 'TQWIRTI', 'RSGSVQ-', 'LLYGSTA', 'YCTDPLR', 'IVRIHCV'] """ if not is_dna_sequence(sequence): return None protein_sequences = [] for frame in reading_frames(sequence): protein_sequence = "" i = 0 while i+3 <= len(frame): codon = frame[i:i+3].upper() protein_sequence += translate_codon(codon) i += 3 protein_sequences.append(protein_sequence) return protein_sequences import doctest doctest.testmod()