2024-06-09 14:56:13 +00:00
|
|
|
#! /usr/bin/env python3
|
|
|
|
# vim:fenc=utf-8
|
|
|
|
#
|
|
|
|
# Copyright © 2024 user <user@penguin>
|
|
|
|
#
|
|
|
|
# Distributed under terms of the MIT license.
|
|
|
|
import os
|
|
|
|
from Bio import Entrez, SeqIO
|
|
|
|
from Bio.Seq import Seq
|
|
|
|
from Bio.SeqFeature import SeqFeature, FeatureLocation
|
|
|
|
from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII
|
|
|
|
from Bio.Graphics import GenomeDiagram
|
|
|
|
from reportlab.lib import colors
|
|
|
|
|
|
|
|
|
|
|
|
#%%
|
|
|
|
# Ustawienia Entrez
|
|
|
|
Entrez.email = "your_email@example.com" # Wpisz swój adres email
|
|
|
|
|
|
|
|
# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie
|
|
|
|
def fetch_and_save_sequence(db, id, file_path):
|
|
|
|
handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text")
|
|
|
|
record = SeqIO.read(handle, "genbank")
|
|
|
|
handle.close()
|
|
|
|
SeqIO.write(record, file_path, "genbank")
|
|
|
|
return record
|
|
|
|
|
|
|
|
# Funkcja do wczytywania sekwencji z lokalnego pliku
|
|
|
|
def load_local_sequence(file_path):
|
|
|
|
with open(file_path, 'r') as file:
|
|
|
|
record = SeqIO.read(file, "genbank")
|
|
|
|
return record
|
|
|
|
|
2024-06-09 15:39:24 +00:00
|
|
|
#%%
|
|
|
|
|
2024-06-09 14:56:13 +00:00
|
|
|
# Ścieżki do lokalnych plików
|
|
|
|
gfp_file_path = 'gfp_sequence.gb'
|
|
|
|
plasmid_file_path = 'plasmid_sequence.gb'
|
|
|
|
|
|
|
|
# ID dla sekwencji w NCBI
|
|
|
|
gfp_id = "U87974"
|
|
|
|
plasmid_id = "U96626"
|
|
|
|
|
|
|
|
# Sprawdzanie i pobieranie sekwencji GFP
|
|
|
|
if not os.path.exists(gfp_file_path):
|
|
|
|
print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...")
|
|
|
|
gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path)
|
|
|
|
print(f"Pobrano i zapisano sekwencję GFP do pliku {gfp_file_path}.")
|
|
|
|
else:
|
|
|
|
print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
|
|
|
|
gfp_record = load_local_sequence(gfp_file_path)
|
|
|
|
print(f"Wczytano dane z pliku {gfp_file_path}.")
|
|
|
|
|
|
|
|
gfp_seq = str(gfp_record.seq)
|
|
|
|
print(f"GFP Sequence: {gfp_seq[:60]}...")
|
|
|
|
|
|
|
|
# Sprawdzanie i pobieranie sekwencji plazmidu
|
|
|
|
if not os.path.exists(plasmid_file_path):
|
|
|
|
print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...")
|
|
|
|
record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path)
|
|
|
|
print(f"Pobrano i zapisano sekwencję plazmidu do pliku {plasmid_file_path}.")
|
|
|
|
else:
|
|
|
|
print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
|
|
|
|
record = load_local_sequence(plasmid_file_path)
|
|
|
|
print(f"Wczytano dane z pliku {plasmid_file_path}.")
|
|
|
|
#%%
|
|
|
|
# Dodanie sekwencji kodującej His6
|
|
|
|
his6_tag = "CACCACCACCACCACCAC" # Sekwencja kodująca 6x histydynę (His6)
|
|
|
|
|
2024-06-09 15:39:24 +00:00
|
|
|
#%%
|
2024-06-09 14:56:13 +00:00
|
|
|
# Sekwencja białka GFP z His6 tagiem na końcu
|
2024-06-09 15:39:24 +00:00
|
|
|
t = gfp_seq_with_his6 = gfp_seq + his6_tag
|
2024-06-09 14:56:13 +00:00
|
|
|
|
2024-06-09 15:39:24 +00:00
|
|
|
#%%
|
2024-06-09 14:56:13 +00:00
|
|
|
# Projektowanie starterów
|
|
|
|
forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP
|
|
|
|
reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6
|
|
|
|
|
|
|
|
print(f"Forward Primer: {forward_primer}")
|
|
|
|
print(f"Reverse Primer: {reverse_primer}")
|
|
|
|
|
2024-06-09 15:39:24 +00:00
|
|
|
|
|
|
|
#%%
|
2024-06-09 14:56:13 +00:00
|
|
|
# Miejsca cięcia restryktazy
|
|
|
|
enzymes = RestrictionBatch([EcoRI, BamHI, HindIII])
|
|
|
|
restriction_sites = enzymes.search(record.seq)
|
|
|
|
|
2024-06-09 15:39:24 +00:00
|
|
|
#%%
|
2024-06-09 14:56:13 +00:00
|
|
|
# Utwórz diagram plazmidu
|
|
|
|
diagram = GenomeDiagram.Diagram("pET-28a(+) Plasmid Map")
|
|
|
|
track = diagram.new_track(1, name="Annotated Features", greytrack=True)
|
|
|
|
feature_set = track.new_set()
|
|
|
|
|
|
|
|
# Dodanie sekwencji kodującej GFP
|
|
|
|
feature_set.add_feature(
|
|
|
|
SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1),
|
|
|
|
name="GFP Coding Sequence",
|
|
|
|
label=True,
|
|
|
|
color=colors.lightblue
|
|
|
|
)
|
|
|
|
|
|
|
|
# Dodanie His6 tagu
|
|
|
|
feature_set.add_feature(
|
|
|
|
SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1),
|
|
|
|
name="His6 Tag",
|
|
|
|
label=True,
|
|
|
|
color=colors.lightgreen
|
|
|
|
)
|
|
|
|
|
|
|
|
# Dodanie forward primer
|
|
|
|
feature_set.add_feature(
|
|
|
|
SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1),
|
|
|
|
name="Forward Primer",
|
|
|
|
label=True,
|
|
|
|
color=colors.orange
|
|
|
|
)
|
|
|
|
|
|
|
|
# Dodanie reverse primer
|
|
|
|
feature_set.add_feature(
|
|
|
|
SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1),
|
|
|
|
name="Reverse Primer",
|
|
|
|
label=True,
|
|
|
|
color=colors.red
|
|
|
|
)
|
|
|
|
|
|
|
|
# Dodanie miejsc cięcia restryktazy
|
|
|
|
for enzyme, sites in restriction_sites.items():
|
|
|
|
for site in sites:
|
|
|
|
feature_set.add_feature(
|
|
|
|
SeqFeature(FeatureLocation(site, site + 1), strand=0),
|
|
|
|
name=enzyme,
|
|
|
|
label=True,
|
|
|
|
color=colors.purple
|
|
|
|
)
|
|
|
|
|
|
|
|
# Rysowanie diagramu
|
|
|
|
diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5)
|
|
|
|
diagram.write("pdf/plasmid_map.pdf", "PDF")
|
|
|
|
|
|
|
|
print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")
|
|
|
|
|