#! /usr/bin/env python3 # vim:fenc=utf-8 # # Copyright © 2024 user # # Distributed under terms of the MIT license. import os from Bio import Entrez, SeqIO from Bio.Seq import Seq from Bio.SeqFeature import SeqFeature, FeatureLocation from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII, Bpu1102I from Bio.Restriction import AllEnzymes, Analysis from Bio.Graphics import GenomeDiagram from reportlab.lib import colors #%% # Ustawienia Entrez Entrez.email = "your_email@example.com" # Wpisz swój adres email # Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie def fetch_and_save_sequence(db, id, file_path): handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text") record = SeqIO.read(handle, "genbank") handle.close() SeqIO.write(record, file_path, "genbank") return record # Funkcja do wczytywania sekwencji z lokalnego pliku def load_local_sequence(file_path): with open(file_path, 'r') as file: record = SeqIO.read(file, "genbank") return record #%% # ID dla sekwencji w NCBI gfp_id = "U87974" plasmid_id = "pET-28a(+)" # Ścieżki do lokalnych plików gfp_file_path = gfp_id+'.gb' plasmid_file_path = plasmid_id+'.gb' #%% # Sprawdzanie i pobieranie sekwencji GFP if not os.path.exists(gfp_file_path): print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...") gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path) print(f"Pobrano i zapisano sekwencję GFP do pliku {gfp_file_path}.") else: print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...") gfp_record = load_local_sequence(gfp_file_path) print(f"Wczytano dane z pliku {gfp_file_path}.") gfp_seq = str(gfp_record.seq) print(f"GFP Sequence: {gfp_seq[:60]}...") #%% # Sprawdzanie i pobieranie sekwencji plazmidu if not os.path.exists(plasmid_file_path): print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...") record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path) print(f"Pobrano i zapisano sekwencję plazmidu do pliku {plasmid_file_path}.") else: print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...") record = load_local_sequence(plasmid_file_path) print(f"Wczytano dane z pliku {plasmid_file_path}.") #%% # Wyświetl etykiety (annotacje) for feature in record.features: print(f"Type: {feature.type}") print(f"Location: {feature.location}") if 'gene' in feature.qualifiers: print(f"Gene: {feature.qualifiers['gene']}") if 'product' in feature.qualifiers: print(f"Product: {feature.qualifiers['product']}") print() #%% #%% # Przejdź przez wszystkie funkcje w rekordzie i wypisz ich szczegółowe informacje for feature in record.features: if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]: print(f"Type: {feature.type}") print(f"Location: {feature.location}") print(f"Strand: {'+' if feature.strand == 1 else '-'}") # Szczegółowe opisy if feature.type == "CDS": gene_name = feature.qualifiers.get('gene', ['Unknown gene'])[0] product = feature.qualifiers.get('product', ['Unknown product'])[0] print(f"Gene Name: {gene_name}") print(f"Product: {product}") elif feature.type == "protein_bind": binding_site = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0] print(f"Binding Site: {binding_site}") elif feature.type == "misc_feature": note = feature.qualifiers.get('note', ['No additional information'])[0] print(f"Note: {note}") elif feature.type == "rep_origin": print("This is a replication origin.") # Pobierz sekwencję funkcji feature_seq = record.seq[feature.location.start:feature.location.end] print(f"Sequence ( ): {feature_seq}\n") # Uwzględnij orientację nici (strand) if feature.strand == -1: feature_seq = feature_seq.reverse_complement() print(f"Sequence (^): {feature_seq}\n") # Znajdź i wyświetl sekwencję terminatora T7 for feature in record.features: if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]: print("Terminator T7 znaleziony:") print(f"Type: {feature.type}") print(f"Location: {feature.location}") print(f"Sequence: {record.seq[feature.location.start:feature.location.end]}") print() #%% # Znajdź i wyświetl sekwencję terminatora T7 for feature in record.features: if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]: print("Terminator T7 znaleziony:") print(f"Type: {feature.type}") print(f"Location: {feature.location}") # Pobierz sekwencję terminatora terminator_seq = record.seq[feature.location.start:feature.location.end] # Sprawdź orientację i odwróć/uzupełnij jeśli potrzebne if feature.strand == -1: terminator_seq = terminator_seq.reverse_complement() print(f"(+) Sequence: {terminator_seq}") print() #%% #%% t=record[25:73].seq #%% a = str (t) a #%% class Model: complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} def __init__ (self, seq=None): self.seq = seq def complement_dna(self): complementary_sequence = ''.join(complement[base] for base in self.seq) return complementary_sequence #%% r = Model(a) #%% r.seq #%% r.seq[::-1] #%% r.complement_dna() #%% r.seq #%% complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} complementary_sequence = ''.join(complement[base] for base in r.seq) complementary_sequence #%% #%% # Sekwencja DNA dna_seq = record.seq # Znajdź miejsca cięcia dla NcoI i Bpu1102I ncoI_sites = NcoI.search(dna_seq) bpu1102I_sites = Bpu1102I.search(dna_seq) # Przyjmij pierwsze miejsca cięcia ncoI_site = ncoI_sites[0] bpu1102I_site = bpu1102I_sites[0] # Wyodrębnij fragmenty 20 zasad od miejsc cięcia ncoI_fragment = dna_seq[ncoI_site-16:ncoI_site+4] # 16 zasad przed i 4 po cięciu bpu1102I_fragment = dna_seq[bpu1102I_site:bpu1102I_site+20] # 20 zasad po cięciu # Lepkie końce ncoI_sticky_end = ncoI_fragment[-4:] ncoI_sticky_end_comp = ncoI_sticky_end.complement() bpu1102I_sticky_end = bpu1102I_fragment[:4] bpu1102I_sticky_end_comp = bpu1102I_sticky_end.complement() # Funkcja do wyświetlania dwuniciowego DNA z lepkimi końcami w formacie schodkowym z indeksami dla dłuższej nici def print_sticky_ends(seq1, sticky_end1, seq2, sticky_end2, start1, start2): # Convert sequences to strings seq1_str = str(seq1) sticky_end1_str = str(sticky_end1) comp_seq1_str = str(seq1.complement()) sticky_end1_comp_str = str(sticky_end1.complement()) seq2_str = str(seq2) sticky_end2_str = str(sticky_end2) comp_seq2_str = str(seq2.complement()) sticky_end2_comp_str = str(sticky_end2.complement()) # Add indexes to the ends seq1_with_index = f"{seq1_str[:-1]} -{start1+len(seq1_str)-1}" comp_seq1_with_index = f"{comp_seq1_str[:-1]}" seq2_with_index = f"{start2}- {seq2_str[1:]}" comp_seq2_with_index = f"{comp_seq2_str[1:]}" # Lustrzane odbicie sekwencji comp_seq2_with_index = comp_seq2_with_index[::-1] sticky_end2_comp_str = sticky_end2_comp_str[::-1] # Format the output as requested print(f"5'-{seq1_with_index} {sticky_end1_str}-3' (+)") print(f"3'-{comp_seq1_with_index}{sticky_end1_comp_str}-5'") print() print(f"5'-{sticky_end2_str} {seq2_with_index}-3' (+)") print(f" {sticky_end2_comp_str}{comp_seq2_with_index}-5'") # Wyświetl lepkie końce fragmentów print("Fragmenty po cięciu NcoI i Bpu1102I:") print_sticky_ends(ncoI_fragment[:-4], ncoI_sticky_end, bpu1102I_fragment[4:], bpu1102I_sticky_end, ncoI_site-16, bpu1102I_site) #%% # Projektowanie starterów forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6 print(f"Forward Primer: {forward_primer}") print(f"Reverse Primer: {reverse_primer}") #%% # Miejsca cięcia restryktazy enzymes = RestrictionBatch([EcoRI, BamHI, HindIII]) #enzymes = RestrictionBatch([BamHI, ]) restriction_sites = enzymes.search(record.seq) #%% # Przeprowadzenie analizy restrykcyjnej analysis = Analysis(AllEnzymes, record.seq) # Wynik analizy results = analysis.full() # Wyświetlanie wyników #for enzyme in results: # if len(results[enzyme]) > 0: # print(f"{enzyme}: {results[enzyme]}") #%% # Utwórz diagram plazmidu diagram = GenomeDiagram.Diagram("PUC19 Plasmid Map") track = diagram.new_track(1, name="Annotated Features", greytrack=True) feature_set = track.new_set() # Dodanie sekwencji kodującej GFP feature_set.add_feature( SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1), name="GFP Coding Sequence", label=True, color=colors.lightblue ) # Dodanie His6 tagu feature_set.add_feature( SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1), name="His6 Tag", label=True, color=colors.lightgreen ) # Dodanie forward primer feature_set.add_feature( SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1), name="Forward Primer", label=True, color=colors.orange ) # Dodanie reverse primer feature_set.add_feature( SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1), name="Reverse Primer", label=True, color=colors.red ) # Dodanie miejsc cięcia restryktazy for enzyme, sites in restriction_sites.items(): for site in sites: feature_set.add_feature( SeqFeature(FeatureLocation(site, site + 1), strand=0), name=enzyme, label=True, color=colors.purple ) # Rysowanie diagramu diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5) diagram.write("pdf/plasmid_map.pdf", "PDF") print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")