From d198b3723d7d6e2ff74090526454d06767a894e7 Mon Sep 17 00:00:00 2001 From: mpabi Date: Tue, 11 Jun 2024 22:32:15 +0200 Subject: [PATCH] Dodano ciecia restryktazami --- U87974.gb | 66 +++++++++++++ gfp2.py | 258 ++++++++++++++++++++++++++++++++++++++++++++++++++ pET-28a(+).gb | 193 +++++++++++++++++++++++++++++++++++++ 3 files changed, 517 insertions(+) create mode 100644 U87974.gb create mode 100644 gfp2.py create mode 100644 pET-28a(+).gb diff --git a/U87974.gb b/U87974.gb new file mode 100644 index 0000000..65dc8f7 --- /dev/null +++ b/U87974.gb @@ -0,0 +1,66 @@ +LOCUS SCU87974 818 bp mRNA linear SYN 24-FEB-1997 +DEFINITION Synthetic construct modified green fluorescent protein GFP5-ER + (mgfp5-ER) mRNA, complete cds. +ACCESSION U87974 +VERSION U87974.1 +KEYWORDS . +SOURCE synthetic construct + ORGANISM synthetic construct + other sequences; artificial sequences. +REFERENCE 1 (bases 1 to 818) + AUTHORS Siemering,K.R., Golbik,R., Sever,R. and Haseloff,J. + TITLE Mutations that suppress the thermosensitivity of green fluorescent + protein + JOURNAL Curr. Biol. 6 (12), 1653-1663 (1996) + PUBMED 8994830 +REFERENCE 2 (bases 1 to 818) + AUTHORS Haseloff,J., Siemering,K.R., Prasher,D. and Hodge,S. + TITLE Removal of a cryptic intron and subcellular localisation of green + fluorescent protein are required to mark transgenic Arabidopsis + plants brightly + JOURNAL Proc. Natl. Acad. Sci. U.S.A. (1997) In press +REFERENCE 3 (bases 1 to 818) + AUTHORS Siemering,K.R., Golbik,R., Sever,R. and Haseloff,J. + TITLE Direct Submission + JOURNAL Submitted (31-JAN-1997) Division of Cell Biology, MRC Laboratory of + Molecular Biology, Hills Road, Cambridge CB2 2QH, UK +FEATURES Location/Qualifiers + source 1..818 + /organism="synthetic construct" + /mol_type="mRNA" + /db_xref="taxon:32630" + gene 1..818 + /gene="mgfp5-ER" + CDS 21..812 + /gene="mgfp5-ER" + /note="contains codon usage changes that disrupt a cryptic + plant intron, mutations that increase the thermotolerance + and change the spectral characteristics of the protein, and + sequences that code for signal peptides that result in + retention of the protein in the plant endoplasmic + reticulum" + /codon_start=1 + /transl_table=11 + /product="modified green fluorescent protein GFP5-ER" + /protein_id="AAB47999.1" + /translation="MKTNLFLFLIFSLLLSLSSAEFSKGEELFTGVVPILVELDGDVNG + HKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDF + FKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLE + YNYNSHNVYIMADKQKNGIKANFKTRHNIEDGGVQLADHYQQNTPIGDGPVLLPDNHYL + STQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYKHDEL" +ORIGIN + 1 ggatccaagg agatataaca atgaagacta atctttttct ctttctcatc ttttcacttc + 61 tcctatcatt atcctcggcc gaattcagta aaggagaaga acttttcact ggagttgtcc + 121 caattcttgt tgaattagat ggtgatgtta atgggcacaa attttctgtc agtggagagg + 181 gtgaaggtga tgcaacatac ggaaaactta cccttaaatt tatttgcact actggaaaac + 241 tacctgttcc atggccaaca cttgtcacta ctttctctta tggtgttcaa tgcttttcaa + 301 gatacccaga tcatatgaag cggcacgact tcttcaagag cgccatgcct gagggatacg + 361 tgcaggagag gaccatcttc ttcaaggacg acgggaacta caagacacgt gctgaagtca + 421 agtttgaggg agacaccctc gtcaacagga tcgagcttaa gggaatcgat ttcaaggagg + 481 acggaaacat cctcggccac aagttggaat acaactacaa ctcccacaac gtatacatca + 541 tggccgacaa gcaaaagaac ggcatcaaag ccaacttcaa gacccgccac aacatcgaag + 601 acggcggcgt gcaactcgct gatcattatc aacaaaatac tccaattggc gatggccctg + 661 tccttttacc agacaaccat tacctgtcca cacaatctgc cctttcgaaa gatcccaacg + 721 aaaagagaga ccacatggtc cttcttgagt ttgtaacagc tgctgggatt acacatggca + 781 tggatgaact atacaaacat gatgagcttt aagagctc +// diff --git a/gfp2.py b/gfp2.py new file mode 100644 index 0000000..4f522e8 --- /dev/null +++ b/gfp2.py @@ -0,0 +1,258 @@ +#! /usr/bin/env python3 +# vim:fenc=utf-8 +# +# Copyright © 2024 user +# +# Distributed under terms of the MIT license. +import os +from Bio import Entrez, SeqIO +from Bio.Seq import Seq +from Bio.SeqFeature import SeqFeature, FeatureLocation + +from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII, Bpu1102I +from Bio.Restriction import AllEnzymes, Analysis + +from Bio.Graphics import GenomeDiagram +from reportlab.lib import colors + +#%% +# Ustawienia Entrez +Entrez.email = "your_email@example.com" # Wpisz swój adres email + +# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie +def fetch_and_save_sequence(db, id, file_path): + handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text") + record = SeqIO.read(handle, "genbank") + handle.close() + SeqIO.write(record, file_path, "genbank") + return record + +# Funkcja do wczytywania sekwencji z lokalnego pliku +def load_local_sequence(file_path): + with open(file_path, 'r') as file: + record = SeqIO.read(file, "genbank") + return record + +#%% + +# ID dla sekwencji w NCBI +gfp_id = "U87974" +plasmid_id = "pET-28a(+)" + +# Ścieżki do lokalnych plików +gfp_file_path = gfp_id+'.gb' +plasmid_file_path = plasmid_id+'.gb' + +#%% +# Sprawdzanie i pobieranie sekwencji GFP +if not os.path.exists(gfp_file_path): + print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...") + gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path) + print(f"Pobrano i zapisano sekwencję GFP do pliku {gfp_file_path}.") +else: + print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...") + gfp_record = load_local_sequence(gfp_file_path) + print(f"Wczytano dane z pliku {gfp_file_path}.") + +gfp_seq = str(gfp_record.seq) +print(f"GFP Sequence: {gfp_seq[:60]}...") + +#%% +# Sprawdzanie i pobieranie sekwencji plazmidu +if not os.path.exists(plasmid_file_path): + print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...") + record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path) + print(f"Pobrano i zapisano sekwencję plazmidu do pliku {plasmid_file_path}.") +else: + print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...") + record = load_local_sequence(plasmid_file_path) + print(f"Wczytano dane z pliku {plasmid_file_path}.") + +#%% +# Wyświetl etykiety (annotacje) +for feature in record.features: + print(f"Type: {feature.type}") + print(f"Location: {feature.location}") + if 'gene' in feature.qualifiers: + print(f"Gene: {feature.qualifiers['gene']}") + if 'product' in feature.qualifiers: + print(f"Product: {feature.qualifiers['product']}") + print() +#%% + + +# Znajdź i wyświetl sekwencję terminatora T7 +for feature in record.features: + if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]: + print("Terminator T7 znaleziony:") + print(f"Type: {feature.type}") + print(f"Location: {feature.location}") + print(f"Sequence: {record.seq[feature.location.start:feature.location.end]}") + print() + + +#%% +# Znajdź i wyświetl sekwencję terminatora T7 +for feature in record.features: + if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]: + print("Terminator T7 znaleziony:") + print(f"Type: {feature.type}") + print(f"Location: {feature.location}") + + # Pobierz sekwencję terminatora + terminator_seq = record.seq[feature.location.start:feature.location.end] + + # Sprawdź orientację i odwróć/uzupełnij jeśli potrzebne + if feature.strand == -1: + terminator_seq = terminator_seq.reverse_complement() + + print(f"Sequence: {terminator_seq}") + print() + + +#%% + +# Sekwencja DNA +dna_seq = record.seq + +# Znajdź miejsca cięcia dla NcoI i Bpu1102I +ncoI_sites = NcoI.search(dna_seq) +bpu1102I_sites = Bpu1102I.search(dna_seq) + +# Przyjmij pierwsze miejsca cięcia +ncoI_site = ncoI_sites[0] +bpu1102I_site = bpu1102I_sites[0] + +# Wyodrębnij fragmenty 20 zasad od miejsc cięcia +ncoI_fragment = dna_seq[ncoI_site-16:ncoI_site+4] # 16 zasad przed i 4 po cięciu +bpu1102I_fragment = dna_seq[bpu1102I_site:bpu1102I_site+20] # 20 zasad po cięciu + +# Lepkie końce +ncoI_sticky_end = ncoI_fragment[-4:] +ncoI_sticky_end_comp = ncoI_sticky_end.complement() + +bpu1102I_sticky_end = bpu1102I_fragment[:4] +bpu1102I_sticky_end_comp = bpu1102I_sticky_end.complement() + +# Funkcja do wyświetlania dwuniciowego DNA z lepkimi końcami w formacie schodkowym z indeksami dla dłuższej nici +def print_sticky_ends(seq1, sticky_end1, seq2, sticky_end2, start1, start2): + # Convert sequences to strings + seq1_str = str(seq1) + sticky_end1_str = str(sticky_end1) + comp_seq1_str = str(seq1.complement()) + sticky_end1_comp_str = str(sticky_end1.complement()) + + seq2_str = str(seq2) + sticky_end2_str = str(sticky_end2) + comp_seq2_str = str(seq2.complement()) + sticky_end2_comp_str = str(sticky_end2.complement()) + + # Add indexes to the ends + seq1_with_index = f"{seq1_str[:-1]} -{start1+len(seq1_str)-1}" + comp_seq1_with_index = f"{comp_seq1_str[:-1]}" + + seq2_with_index = f"{start2}- {seq2_str[1:]}" + comp_seq2_with_index = f"{comp_seq2_str[1:]}" + + # Lustrzane odbicie sekwencji + comp_seq2_with_index = comp_seq2_with_index[::-1] + sticky_end2_comp_str = sticky_end2_comp_str[::-1] + + # Format the output as requested + print(f"5'-{seq1_with_index} {sticky_end1_str}-3' (+)") + print(f"3'-{comp_seq1_with_index}{sticky_end1_comp_str}-5'") + print() + print(f"5'-{sticky_end2_str} {seq2_with_index}-3' (+)") + print(f" {sticky_end2_comp_str}{comp_seq2_with_index}-5'") + +# Wyświetl lepkie końce fragmentów +print("Fragmenty po cięciu NcoI i Bpu1102I:") +print_sticky_ends(ncoI_fragment[:-4], ncoI_sticky_end, bpu1102I_fragment[4:], bpu1102I_sticky_end, ncoI_site-16, bpu1102I_site) + +#%% +# Dodanie sekwencji kodującej His6 +his6_tag = "CACCACCACCACCACCAC" # Sekwencja kodująca 6x histydynę (His6) + +#%% +# Sekwencja białka GFP z His6 tagiem na końcu +t = gfp_seq_with_his6 = gfp_seq + his6_tag + +#%% +# Projektowanie starterów +forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP +reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6 + +print(f"Forward Primer: {forward_primer}") +print(f"Reverse Primer: {reverse_primer}") + +#%% +# Miejsca cięcia restryktazy +enzymes = RestrictionBatch([EcoRI, BamHI, HindIII]) +#enzymes = RestrictionBatch([BamHI, ]) +restriction_sites = enzymes.search(record.seq) + +#%% +# Przeprowadzenie analizy restrykcyjnej +analysis = Analysis(AllEnzymes, record.seq) + +# Wynik analizy +results = analysis.full() + +# Wyświetlanie wyników +#for enzyme in results: +# if len(results[enzyme]) > 0: +# print(f"{enzyme}: {results[enzyme]}") + +#%% +# Utwórz diagram plazmidu +diagram = GenomeDiagram.Diagram("PUC19 Plasmid Map") +track = diagram.new_track(1, name="Annotated Features", greytrack=True) +feature_set = track.new_set() + +# Dodanie sekwencji kodującej GFP +feature_set.add_feature( + SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1), + name="GFP Coding Sequence", + label=True, + color=colors.lightblue +) + +# Dodanie His6 tagu +feature_set.add_feature( + SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1), + name="His6 Tag", + label=True, + color=colors.lightgreen +) + +# Dodanie forward primer +feature_set.add_feature( + SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1), + name="Forward Primer", + label=True, + color=colors.orange +) + +# Dodanie reverse primer +feature_set.add_feature( + SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1), + name="Reverse Primer", + label=True, + color=colors.red +) + +# Dodanie miejsc cięcia restryktazy +for enzyme, sites in restriction_sites.items(): + for site in sites: + feature_set.add_feature( + SeqFeature(FeatureLocation(site, site + 1), strand=0), + name=enzyme, + label=True, + color=colors.purple + ) + +# Rysowanie diagramu +diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5) +diagram.write("pdf/plasmid_map.pdf", "PDF") + +print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'") diff --git a/pET-28a(+).gb b/pET-28a(+).gb new file mode 100644 index 0000000..c39bb21 --- /dev/null +++ b/pET-28a(+).gb @@ -0,0 +1,193 @@ +LOCUS 40924_17796 5369 bp DNA circular SYN 14-OCT-2021 +DEFINITION synthetic circular DNA. +ACCESSION . +VERSION . +KEYWORDS . +SOURCE synthetic DNA construct + ORGANISM synthetic DNA construct +REFERENCE 1 (bases 1 to 5369) + AUTHORS caoheibi + TITLE Direct Submission +REFERENCE 2 (bases 1 to 5369) + AUTHORS . + TITLE Direct Submission +COMMENT SGRef: number: 1; type: "Journal Article" +FEATURES Location/Qualifiers + source 1..5369 + /mol_type="other DNA" + /organism="synthetic DNA construct" + terminator complement(26..73) + /label=T7 terminator + /note="transcription terminator for bacteriophage T7 RNA + polymerase" + CDS complement(140..157) + /codon_start=1 + /label=6xHis + /note="6xHis affinity tag" + /translation="HHHHHH" + CDS complement(207..239) + /codon_start=1 + /label=T7 tag (gene 10 leader) + /note="leader peptide from bacteriophage T7 gene 10" + /translation="MASMTGGQQMG" + CDS complement(243..260) + /codon_start=1 + /label=thrombin site + /note="thrombin recognition and cleavage site" + /translation="LVPRGS" + CDS complement(270..287) + /codon_start=1 + /label=6xHis + /note="6xHis affinity tag" + /translation="HHHHHH" + RBS complement(306..328) + /label=RBS + /note="efficient ribosome binding site from bacteriophage + T7 gene 10 (Olins and Rangwala, 1989)" + protein_bind complement(343..367) + /label=lac operator + /note="The lac repressor binds to the lac operator to + inhibit transcription in E. coli. This inhibition can be + relieved by adding lactose or + isopropyl-beta-D-thiogalactopyranoside (IPTG)." + promoter complement(368..386) + /label=T7 promoter + /note="promoter for bacteriophage T7 RNA polymerase" + promoter 695..772 + /label=lacI promoter + CDS 773..1852 + /codon_start=1 + /label=lacI + /note="lac repressor" + /translation="VKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAEL + NYIPNRVAQQLAGKQSLLIGVATSSLALHAPSQIVAAIKSRADQLGASVVVSMVERSGV + EACKAAVHNLLAQRVSGLIINYPLDDQDAIAVEAACTNVPALFLDVSDQTPINSIIFSH + EDGTRLGVEHLVALGHQQIALLAGPLSSVSARLRLAGWHKYLTRNQIQPIAEREGDWSA + MSGFQQTMQMLNEGIVPTAMLVANDQMALGAMRAITESGLRVGADISVVGYDDTEDSSC + YIPPLTTIKQDFRLLGQTSVDRLLQLSQGQAVKGNQLLPVSLVKRKTTLAPNTQTASPR + ALADSLMQLARQVSRLESGQ" + protein_bind 1868..1889 + /label=CAP binding site + /note="CAP binding activates transcription in the presence + of cAMP." + CDS 2664..2852 + /codon_start=1 + /label=rop + /note="Rop protein, which maintains plasmids at low copy + number" + /translation="VTKQEKTALNMARFIRSQTLTLLEKLNELDADEQADICESLHDHA + DELYRSCLARFGDDGENL" + misc_feature 2957..3099 + /label=bom + /note="basis of mobility region from pBR322" + rep_origin complement(3285..3873) + /direction=LEFT + /label=ori + /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of + replication" + CDS 3995..4807 + /codon_start=1 + /label=KanR + /note="aminoglycoside phosphotransferase" + /translation="MSHIQRETSCSRPRLNSNMDADLYGYKWARDNVGQSGATIYRLYG + KPDAPELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTAIPGK + TAFQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLAQAQSRMNNGLVDA + SDFDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFSLDNLIFDEGKLIGCIDVGRVGI + ADRYQDLAILWNCLGEFSPSLQKRLFQKYGIDNPDMNKLQFHLMLDEFF" + rep_origin complement(4903..5358) + /direction=LEFT + /label=f1 ori + /note="f1 bacteriophage origin of replication; arrow + indicates direction of (+) strand synthesis" +ORIGIN + 1 atccggatat agttcctcct ttcagcaaaa aacccctcaa gacccgttta gaggccccaa + 61 ggggttatgc tagttattgc tcagcggtgg cagcagccaa ctcagcttcc tttcgggctt + 121 tgttagcagc cggatctcag tggtggtggt ggtggtgctc gagtgcggcc gcaagcttgt + 181 cgacggagct cgaattcgga tccgcgaccc atttgctgtc caccagtcat gctagccata + 241 tggctgccgc gcggcaccag gccgctgctg tgatgatgat gatgatggct gctgcccatg + 301 gtatatctcc ttcttaaagt taaacaaaat tatttctaga ggggaattgt tatccgctca + 361 caattcccct atagtgagtc gtattaattt cgcgggatcg agatctcgat cctctacgcc + 421 ggacgcatcg tggccggcat caccggcgcc acaggtgcgg ttgctggcgc ctatatcgcc + 481 gacatcaccg atggggaaga tcgggctcgc cacttcgggc tcatgagcgc ttgtttcggc + 541 gtgggtatgg tggcaggccc cgtggccggg ggactgttgg gcgccatctc cttgcatgca + 601 ccattccttg cggcggcggt gctcaacggc ctcaacctac tactgggctg cttcctaatg + 661 caggagtcgc ataagggaga gcgtcgagat cccggacacc atcgaatggc gcaaaacctt + 721 tcgcggtatg gcatgatagc gcccggaaga gagtcaattc agggtggtga atgtgaaacc + 781 agtaacgtta tacgatgtcg cagagtatgc cggtgtctct tatcagaccg tttcccgcgt + 841 ggtgaaccag gccagccacg tttctgcgaa aacgcgggaa aaagtggaag cggcgatggc + 901 ggagctgaat tacattccca accgcgtggc acaacaactg gcgggcaaac agtcgttgct + 961 gattggcgtt gccacctcca gtctggccct gcacgcgccg tcgcaaattg tcgcggcgat + 1021 taaatctcgc gccgatcaac tgggtgccag cgtggtggtg tcgatggtag aacgaagcgg + 1081 cgtcgaagcc tgtaaagcgg cggtgcacaa tcttctcgcg caacgcgtca gtgggctgat + 1141 cattaactat ccgctggatg accaggatgc cattgctgtg gaagctgcct gcactaatgt + 1201 tccggcgtta tttcttgatg tctctgacca gacacccatc aacagtatta ttttctccca + 1261 tgaagacggt acgcgactgg gcgtggagca tctggtcgca ttgggtcacc agcaaatcgc + 1321 gctgttagcg ggcccattaa gttctgtctc ggcgcgtctg cgtctggctg gctggcataa + 1381 atatctcact cgcaatcaaa ttcagccgat agcggaacgg gaaggcgact ggagtgccat + 1441 gtccggtttt caacaaacca tgcaaatgct gaatgagggc atcgttccca ctgcgatgct + 1501 ggttgccaac gatcagatgg cgctgggcgc aatgcgcgcc attaccgagt ccgggctgcg + 1561 cgttggtgcg gatatctcgg tagtgggata cgacgatacc gaagacagct catgttatat + 1621 cccgccgtta accaccatca aacaggattt tcgcctgctg gggcaaacca gcgtggaccg + 1681 cttgctgcaa ctctctcagg gccaggcggt gaagggcaat cagctgttgc ccgtctcact + 1741 ggtgaaaaga aaaaccaccc tggcgcccaa tacgcaaacc gcctctcccc gcgcgttggc + 1801 cgattcatta atgcagctgg cacgacaggt ttcccgactg gaaagcgggc agtgagcgca + 1861 acgcaattaa tgtaagttag ctcactcatt aggcaccggg atctcgaccg atgcccttga + 1921 gagccttcaa cccagtcagc tccttccggt gggcgcgggg catgactatc gtcgccgcac + 1981 ttatgactgt cttctttatc atgcaactcg taggacaggt gccggcagcg ctctgggtca + 2041 ttttcggcga ggaccgcttt cgctggagcg cgacgatgat cggcctgtcg cttgcggtat + 2101 tcggaatctt gcacgccctc gctcaagcct tcgtcactgg tcccgccacc aaacgtttcg + 2161 gcgagaagca ggccattatc gccggcatgg cggccccacg ggtgcgcatg atcgtgctcc + 2221 tgtcgttgag gacccggcta ggctggcggg gttgccttac tggttagcag aatgaatcac + 2281 cgatacgcga gcgaacgtga agcgactgct gctgcaaaac gtctgcgacc tgagcaacaa + 2341 catgaatggt cttcggtttc cgtgtttcgt aaagtctgga aacgcggaag tcagcgccct + 2401 gcaccattat gttccggatc tgcatcgcag gatgctgctg gctaccctgt ggaacaccta + 2461 catctgtatt aacgaagcgc tggcattgac cctgagtgat ttttctctgg tcccgccgca + 2521 tccataccgc cagttgttta ccctcacaac gttccagtaa ccgggcatgt tcatcatcag + 2581 taacccgtat cgtgagcatc ctctctcgtt tcatcggtat cattaccccc atgaacagaa + 2641 atccccctta cacggaggca tcagtgacca aacaggaaaa aaccgccctt aacatggccc + 2701 gctttatcag aagccagaca ttaacgcttc tggagaaact caacgagctg gacgcggatg + 2761 aacaggcaga catctgtgaa tcgcttcacg accacgctga tgagctttac cgcagctgcc + 2821 tcgcgcgttt cggtgatgac ggtgaaaacc tctgacacat gcagctcccg gagacggtca + 2881 cagcttgtct gtaagcggat gccgggagca gacaagcccg tcagggcgcg tcagcgggtg + 2941 ttggcgggtg tcggggcgca gccatgaccc agtcacgtag cgatagcgga gtgtatactg + 3001 gcttaactat gcggcatcag agcagattgt actgagagtg caccatatat gcggtgtgaa + 3061 ataccgcaca gatgcgtaag gagaaaatac cgcatcaggc gctcttccgc ttcctcgctc + 3121 actgactcgc tgcgctcggt cgttcggctg cggcgagcgg tatcagctca ctcaaaggcg + 3181 gtaatacggt tatccacaga atcaggggat aacgcaggaa agaacatgtg agcaaaaggc + 3241 cagcaaaagg ccaggaaccg taaaaaggcc gcgttgctgg cgtttttcca taggctccgc + 3301 ccccctgacg agcatcacaa aaatcgacgc tcaagtcaga ggtggcgaaa cccgacagga + 3361 ctataaagat accaggcgtt tccccctgga agctccctcg tgcgctctcc tgttccgacc + 3421 ctgccgctta ccggatacct gtccgccttt ctcccttcgg gaagcgtggc gctttctcat + 3481 agctcacgct gtaggtatct cagttcggtg taggtcgttc gctccaagct gggctgtgtg + 3541 cacgaacccc ccgttcagcc cgaccgctgc gccttatccg gtaactatcg tcttgagtcc + 3601 aacccggtaa gacacgactt atcgccactg gcagcagcca ctggtaacag gattagcaga + 3661 gcgaggtatg taggcggtgc tacagagttc ttgaagtggt ggcctaacta cggctacact + 3721 agaaggacag tatttggtat ctgcgctctg ctgaagccag ttaccttcgg aaaaagagtt + 3781 ggtagctctt gatccggcaa acaaaccacc gctggtagcg gtggtttttt tgtttgcaag + 3841 cagcagatta cgcgcagaaa aaaaggatct caagaagatc ctttgatctt ttctacgggg + 3901 tctgacgctc agtggaacga aaactcacgt taagggattt tggtcatgaa caataaaact + 3961 gtctgcttac ataaacagta atacaagggg tgttatgagc catattcaac gggaaacgtc + 4021 ttgctctagg ccgcgattaa attccaacat ggatgctgat ttatatgggt ataaatgggc + 4081 tcgcgataat gtcgggcaat caggtgcgac aatctatcga ttgtatggga agcccgatgc + 4141 gccagagttg tttctgaaac atggcaaagg tagcgttgcc aatgatgtta cagatgagat + 4201 ggtcagacta aactggctga cggaatttat gcctcttccg accatcaagc attttatccg + 4261 tactcctgat gatgcatggt tactcaccac tgcgatcccc gggaaaacag cattccaggt + 4321 attagaagaa tatcctgatt caggtgaaaa tattgttgat gcgctggcag tgttcctgcg + 4381 ccggttgcat tcgattcctg tttgtaattg tccttttaac agcgatcgcg tatttcgtct + 4441 cgctcaggcg caatcacgaa tgaataacgg tttggttgat gcgagtgatt ttgatgacga + 4501 gcgtaatggc tggcctgttg aacaagtctg gaaagaaatg cataaacttt tgccattctc + 4561 accggattca gtcgtcactc atggtgattt ctcacttgat aaccttattt ttgacgaggg + 4621 gaaattaata ggttgtattg atgttggacg agtcggaatc gcagaccgat accaggatct + 4681 tgccatccta tggaactgcc tcggtgagtt ttctccttca ttacagaaac ggctttttca + 4741 aaaatatggt attgataatc ctgatatgaa taaattgcag tttcatttga tgctcgatga + 4801 gtttttctaa gaattaattc atgagcggat acatatttga atgtatttag aaaaataaac + 4861 aaataggggt tccgcgcaca tttccccgaa aagtgccacc tgaaattgta aacgttaata + 4921 ttttgttaaa attcgcgtta aatttttgtt aaatcagctc attttttaac caataggccg + 4981 aaatcggcaa aatcccttat aaatcaaaag aatagaccga gatagggttg agtgttgttc + 5041 cagtttggaa caagagtcca ctattaaaga acgtggactc caacgtcaaa gggcgaaaaa + 5101 ccgtctatca gggcgatggc ccactacgtg aaccatcacc ctaatcaagt tttttggggt + 5161 cgaggtgccg taaagcacta aatcggaacc ctaaagggag cccccgattt agagcttgac + 5221 ggggaaagcc ggcgaacgtg gcgagaaagg aagggaagaa agcgaaagga gcgggcgcta + 5281 gggcgctggc aagtgtagcg gtcacgctgc gcgtaaccac cacacccgcc gcgcttaatg + 5341 cgccgctaca gggcgcgtcc cattcgcca +//