334 lines
10 KiB
Python
334 lines
10 KiB
Python
#! /usr/bin/env python3
|
||
# vim:fenc=utf-8
|
||
#
|
||
# Copyright © 2024 user <user@penguin>
|
||
#
|
||
# Distributed under terms of the MIT license.
|
||
import os
|
||
from Bio import Entrez, SeqIO
|
||
from Bio.Seq import Seq
|
||
from Bio.SeqFeature import SeqFeature, FeatureLocation
|
||
|
||
from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII, Bpu1102I
|
||
from Bio.Restriction import AllEnzymes, Analysis
|
||
|
||
from Bio.Graphics import GenomeDiagram
|
||
from reportlab.lib import colors
|
||
|
||
#%%
|
||
# Ustawienia Entrez
|
||
Entrez.email = "your_email@example.com" # Wpisz swój adres email
|
||
|
||
# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie
|
||
def fetch_and_save_sequence(db, id, file_path):
|
||
handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text")
|
||
record = SeqIO.read(handle, "genbank")
|
||
handle.close()
|
||
SeqIO.write(record, file_path, "genbank")
|
||
return record
|
||
|
||
# Funkcja do wczytywania sekwencji z lokalnego pliku
|
||
def load_local_sequence(file_path):
|
||
with open(file_path, 'r') as file:
|
||
record = SeqIO.read(file, "genbank")
|
||
return record
|
||
|
||
#%%
|
||
|
||
# ID dla sekwencji w NCBI
|
||
gfp_id = "U87974"
|
||
plasmid_id = "pET-28a(+)"
|
||
|
||
# Åcieżki do lokalnych plików
|
||
gfp_file_path = gfp_id+'.gb'
|
||
plasmid_file_path = plasmid_id+'.gb'
|
||
|
||
#%%
|
||
# Sprawdzanie i pobieranie sekwencji GFP
|
||
if not os.path.exists(gfp_file_path):
|
||
print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...")
|
||
gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path)
|
||
print(f"Pobrano i zapisano sekwencjÄ GFP do pliku {gfp_file_path}.")
|
||
else:
|
||
print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
|
||
gfp_record = load_local_sequence(gfp_file_path)
|
||
print(f"Wczytano dane z pliku {gfp_file_path}.")
|
||
|
||
gfp_seq = str(gfp_record.seq)
|
||
print(f"GFP Sequence: {gfp_seq[:60]}...")
|
||
|
||
#%%
|
||
# Sprawdzanie i pobieranie sekwencji plazmidu
|
||
if not os.path.exists(plasmid_file_path):
|
||
print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...")
|
||
record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path)
|
||
print(f"Pobrano i zapisano sekwencjÄ plazmidu do pliku {plasmid_file_path}.")
|
||
else:
|
||
print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
|
||
record = load_local_sequence(plasmid_file_path)
|
||
print(f"Wczytano dane z pliku {plasmid_file_path}.")
|
||
|
||
#%%
|
||
# WyÅwietl etykiety (annotacje)
|
||
for feature in record.features:
|
||
print(f"Type: {feature.type}")
|
||
print(f"Location: {feature.location}")
|
||
if 'gene' in feature.qualifiers:
|
||
print(f"Gene: {feature.qualifiers['gene']}")
|
||
if 'product' in feature.qualifiers:
|
||
print(f"Product: {feature.qualifiers['product']}")
|
||
print()
|
||
#%%
|
||
|
||
|
||
#%%
|
||
|
||
# Przejdź przez wszystkie funkcje w rekordzie i wypisz ich szczegóÅowe informacje
|
||
for feature in record.features:
|
||
if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]:
|
||
print(f"Type: {feature.type}")
|
||
print(f"Location: {feature.location}")
|
||
print(f"Strand: {'+' if feature.strand == 1 else '-'}")
|
||
|
||
# SzczegóÅowe opisy
|
||
if feature.type == "CDS":
|
||
gene_name = feature.qualifiers.get('gene', ['Unknown gene'])[0]
|
||
product = feature.qualifiers.get('product', ['Unknown product'])[0]
|
||
print(f"Gene Name: {gene_name}")
|
||
print(f"Product: {product}")
|
||
elif feature.type == "protein_bind":
|
||
binding_site = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0]
|
||
print(f"Binding Site: {binding_site}")
|
||
elif feature.type == "misc_feature":
|
||
note = feature.qualifiers.get('note', ['No additional information'])[0]
|
||
print(f"Note: {note}")
|
||
elif feature.type == "rep_origin":
|
||
print("This is a replication origin.")
|
||
|
||
# Pobierz sekwencjÄ funkcji
|
||
feature_seq = record.seq[feature.location.start:feature.location.end]
|
||
|
||
|
||
print(f"Sequence ( ): {feature_seq}\n")
|
||
# UwzglÄdnij orientacjÄ nici (strand)
|
||
if feature.strand == -1:
|
||
feature_seq = feature_seq.reverse_complement()
|
||
print(f"Sequence (^): {feature_seq}\n")
|
||
|
||
|
||
|
||
# Znajdź i wyÅwietl sekwencjÄ terminatora T7
|
||
for feature in record.features:
|
||
if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]:
|
||
print("Terminator T7 znaleziony:")
|
||
print(f"Type: {feature.type}")
|
||
print(f"Location: {feature.location}")
|
||
print(f"Sequence: {record.seq[feature.location.start:feature.location.end]}")
|
||
print()
|
||
|
||
|
||
|
||
|
||
#%%
|
||
# Znajdź i wyÅwietl sekwencjÄ terminatora T7
|
||
for feature in record.features:
|
||
if feature.type == "terminator" and "T7" in feature.qualifiers.get('note', [''])[0]:
|
||
print("Terminator T7 znaleziony:")
|
||
print(f"Type: {feature.type}")
|
||
print(f"Location: {feature.location}")
|
||
|
||
# Pobierz sekwencjÄ terminatora
|
||
terminator_seq = record.seq[feature.location.start:feature.location.end]
|
||
|
||
# Sprawdź orientacjÄ i odwróÄ/uzupeÅnij jeÅli potrzebne
|
||
if feature.strand == -1:
|
||
terminator_seq = terminator_seq.reverse_complement()
|
||
|
||
print(f"(+) Sequence: {terminator_seq}")
|
||
print()
|
||
|
||
#%%
|
||
|
||
|
||
#%%
|
||
t=record[25:73].seq
|
||
|
||
|
||
#%%
|
||
a = str (t)
|
||
a
|
||
|
||
#%%
|
||
class Model:
|
||
|
||
complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
|
||
|
||
def __init__ (self, seq=None):
|
||
self.seq = seq
|
||
|
||
def complement_dna(self):
|
||
|
||
complementary_sequence = ''.join(self.complement[base] for base in self.seq)
|
||
return complementary_sequence
|
||
|
||
#%%
|
||
r = Model(a)
|
||
|
||
#%%
|
||
r.seq
|
||
|
||
#%%
|
||
r.seq[::-1]
|
||
|
||
#%%
|
||
r.complement_dna()
|
||
|
||
#%%
|
||
r.seq
|
||
#%%
|
||
complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
|
||
complementary_sequence = ''.join(complement[base] for base in r.seq)
|
||
complementary_sequence
|
||
|
||
#%%
|
||
|
||
|
||
#%%
|
||
# Sekwencja DNA
|
||
dna_seq = record.seq
|
||
|
||
# Znajdź miejsca ciÄcia dla NcoI i Bpu1102I
|
||
ncoI_sites = NcoI.search(dna_seq)
|
||
bpu1102I_sites = Bpu1102I.search(dna_seq)
|
||
|
||
# Przyjmij pierwsze miejsca ciÄcia
|
||
ncoI_site = ncoI_sites[0]
|
||
bpu1102I_site = bpu1102I_sites[0]
|
||
|
||
# WyodrÄbnij fragmenty 20 zasad od miejsc ciÄcia
|
||
ncoI_fragment = dna_seq[ncoI_site-16:ncoI_site+4] # 16 zasad przed i 4 po ciÄciu
|
||
bpu1102I_fragment = dna_seq[bpu1102I_site:bpu1102I_site+20] # 20 zasad po ciÄciu
|
||
|
||
# Lepkie koÅce
|
||
ncoI_sticky_end = ncoI_fragment[-4:]
|
||
ncoI_sticky_end_comp = ncoI_sticky_end.complement()
|
||
|
||
bpu1102I_sticky_end = bpu1102I_fragment[:4]
|
||
bpu1102I_sticky_end_comp = bpu1102I_sticky_end.complement()
|
||
|
||
# Funkcja do wyÅwietlania dwuniciowego DNA z lepkimi koÅcami w formacie schodkowym z indeksami dla dÅuższej nici
|
||
def print_sticky_ends(seq1, sticky_end1, seq2, sticky_end2, start1, start2):
|
||
# Convert sequences to strings
|
||
seq1_str = str(seq1)
|
||
sticky_end1_str = str(sticky_end1)
|
||
comp_seq1_str = str(seq1.complement())
|
||
sticky_end1_comp_str = str(sticky_end1.complement())
|
||
|
||
seq2_str = str(seq2)
|
||
sticky_end2_str = str(sticky_end2)
|
||
comp_seq2_str = str(seq2.complement())
|
||
sticky_end2_comp_str = str(sticky_end2.complement())
|
||
|
||
# Add indexes to the ends
|
||
seq1_with_index = f"{seq1_str[:-1]} -{start1+len(seq1_str)-1}"
|
||
comp_seq1_with_index = f"{comp_seq1_str[:-1]}"
|
||
|
||
seq2_with_index = f"{start2}- {seq2_str[1:]}"
|
||
comp_seq2_with_index = f"{comp_seq2_str[1:]}"
|
||
|
||
# Lustrzane odbicie sekwencji
|
||
comp_seq2_with_index = comp_seq2_with_index[::-1]
|
||
sticky_end2_comp_str = sticky_end2_comp_str[::-1]
|
||
|
||
# Format the output as requested
|
||
print(f"5'-{seq1_with_index} {sticky_end1_str}-3' (+)")
|
||
print(f"3'-{comp_seq1_with_index}{sticky_end1_comp_str}-5'")
|
||
print()
|
||
print(f"5'-{sticky_end2_str} {seq2_with_index}-3' (+)")
|
||
print(f" {sticky_end2_comp_str}{comp_seq2_with_index}-5'")
|
||
|
||
# WyÅwietl lepkie koÅce fragmentów
|
||
print("Fragmenty po ciÄciu NcoI i Bpu1102I:")
|
||
print_sticky_ends(ncoI_fragment[:-4], ncoI_sticky_end, bpu1102I_fragment[4:], bpu1102I_sticky_end, ncoI_site-16, bpu1102I_site)
|
||
|
||
#%%
|
||
# Projektowanie starterów
|
||
forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP
|
||
reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6
|
||
|
||
print(f"Forward Primer: {forward_primer}")
|
||
print(f"Reverse Primer: {reverse_primer}")
|
||
|
||
#%%
|
||
# Miejsca ciÄcia restryktazy
|
||
enzymes = RestrictionBatch([EcoRI, BamHI, HindIII])
|
||
#enzymes = RestrictionBatch([BamHI, ])
|
||
restriction_sites = enzymes.search(record.seq)
|
||
|
||
#%%
|
||
# Przeprowadzenie analizy restrykcyjnej
|
||
analysis = Analysis(AllEnzymes, record.seq)
|
||
|
||
# Wynik analizy
|
||
results = analysis.full()
|
||
|
||
# WyÅwietlanie wyników
|
||
#for enzyme in results:
|
||
# if len(results[enzyme]) > 0:
|
||
# print(f"{enzyme}: {results[enzyme]}")
|
||
|
||
#%%
|
||
# Utwórz diagram plazmidu
|
||
diagram = GenomeDiagram.Diagram("PUC19 Plasmid Map")
|
||
track = diagram.new_track(1, name="Annotated Features", greytrack=True)
|
||
feature_set = track.new_set()
|
||
|
||
# Dodanie sekwencji kodujÄ
cej GFP
|
||
feature_set.add_feature(
|
||
SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1),
|
||
name="GFP Coding Sequence",
|
||
label=True,
|
||
color=colors.lightblue
|
||
)
|
||
|
||
# Dodanie His6 tagu
|
||
feature_set.add_feature(
|
||
SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1),
|
||
name="His6 Tag",
|
||
label=True,
|
||
color=colors.lightgreen
|
||
)
|
||
|
||
# Dodanie forward primer
|
||
feature_set.add_feature(
|
||
SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1),
|
||
name="Forward Primer",
|
||
label=True,
|
||
color=colors.orange
|
||
)
|
||
|
||
# Dodanie reverse primer
|
||
feature_set.add_feature(
|
||
SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1),
|
||
name="Reverse Primer",
|
||
label=True,
|
||
color=colors.red
|
||
)
|
||
|
||
# Dodanie miejsc ciÄcia restryktazy
|
||
for enzyme, sites in restriction_sites.items():
|
||
for site in sites:
|
||
feature_set.add_feature(
|
||
SeqFeature(FeatureLocation(site, site + 1), strand=0),
|
||
name=enzyme,
|
||
label=True,
|
||
color=colors.purple
|
||
)
|
||
|
||
# Rysowanie diagramu
|
||
diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5)
|
||
diagram.write("pdf/plasmid_map.pdf", "PDF")
|
||
|
||
print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")
|
||
|