This commit is contained in:
mpabi 2024-06-09 16:56:13 +02:00
commit f5b3d446e1
4 changed files with 405 additions and 0 deletions

134
gfp-old.py Normal file
View File

@ -0,0 +1,134 @@
#! /usr/bin/env python3
# vim:fenc=utf-8
#
# Copyright © 2024 user <user@penguin>
#
# Distributed under terms of the MIT license.
import os
from Bio import Entrez, SeqIO
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.Restriction import RestrictionBatch, EcoRI, BamHI, HindIII
from Bio.Graphics import GenomeDiagram
from reportlab.lib import colors
#%%
# Ustawienia Entrez
Entrez.email = "your_email@example.com" # Wpisz swój adres email
# Funkcja do pobierania sekwencji z NCBI i zapisywania jej lokalnie
def fetch_and_save_sequence(db, id, file_path):
handle = Entrez.efetch(db=db, id=id, rettype="gb", retmode="text")
record = SeqIO.read(handle, "genbank")
handle.close()
SeqIO.write(record, file_path, "genbank")
return record
# Funkcja do wczytywania sekwencji z lokalnego pliku
def load_local_sequence(file_path):
with open(file_path, 'r') as file:
record = SeqIO.read(file, "genbank")
return record
# Ścieżki do lokalnych plików
gfp_file_path = 'gfp_sequence.gb'
plasmid_file_path = 'plasmid_sequence.gb'
# ID dla sekwencji w NCBI
gfp_id = "U87974"
plasmid_id = "U96626"
# Sprawdzanie i pobieranie sekwencji GFP
if not os.path.exists(gfp_file_path):
print(f"Plik {gfp_file_path} nie istnieje. Pobieranie z serwera NCBI...")
gfp_record = fetch_and_save_sequence("nuccore", gfp_id, gfp_file_path)
print(f"Pobrano i zapisano sekwencję GFP do pliku {gfp_file_path}.")
else:
print(f"Plik {gfp_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
gfp_record = load_local_sequence(gfp_file_path)
print(f"Wczytano dane z pliku {gfp_file_path}.")
gfp_seq = str(gfp_record.seq)
print(f"GFP Sequence: {gfp_seq[:60]}...")
# Sprawdzanie i pobieranie sekwencji plazmidu
if not os.path.exists(plasmid_file_path):
print(f"Plik {plasmid_file_path} nie istnieje. Pobieranie z serwera NCBI...")
record = fetch_and_save_sequence("nuccore", plasmid_id, plasmid_file_path)
print(f"Pobrano i zapisano sekwencję plazmidu do pliku {plasmid_file_path}.")
else:
print(f"Plik {plasmid_file_path} istnieje. Wczytywanie danych z lokalnego pliku...")
record = load_local_sequence(plasmid_file_path)
print(f"Wczytano dane z pliku {plasmid_file_path}.")
#%%
# Dodanie sekwencji kodującej His6
his6_tag = "CACCACCACCACCACCAC" # Sekwencja kodująca 6x histydynę (His6)
# Sekwencja białka GFP z His6 tagiem na końcu
gfp_seq_with_his6 = gfp_seq + his6_tag
# Projektowanie starterów
forward_primer = gfp_seq[:20] # Pierwsze 20 nukleotydów sekwencji GFP
reverse_primer = str(Seq(gfp_seq_with_his6[-20:]).reverse_complement()) # Ostatnie 20 nukleotydów + His6
print(f"Forward Primer: {forward_primer}")
print(f"Reverse Primer: {reverse_primer}")
# Miejsca cięcia restryktazy
enzymes = RestrictionBatch([EcoRI, BamHI, HindIII])
restriction_sites = enzymes.search(record.seq)
# Utwórz diagram plazmidu
diagram = GenomeDiagram.Diagram("pET-28a(+) Plasmid Map")
track = diagram.new_track(1, name="Annotated Features", greytrack=True)
feature_set = track.new_set()
# Dodanie sekwencji kodującej GFP
feature_set.add_feature(
SeqFeature(FeatureLocation(0, len(gfp_seq)), strand=+1),
name="GFP Coding Sequence",
label=True,
color=colors.lightblue
)
# Dodanie His6 tagu
feature_set.add_feature(
SeqFeature(FeatureLocation(len(gfp_seq), len(gfp_seq_with_his6)), strand=+1),
name="His6 Tag",
label=True,
color=colors.lightgreen
)
# Dodanie forward primer
feature_set.add_feature(
SeqFeature(FeatureLocation(0, len(forward_primer)), strand=+1),
name="Forward Primer",
label=True,
color=colors.orange
)
# Dodanie reverse primer
feature_set.add_feature(
SeqFeature(FeatureLocation(len(gfp_seq_with_his6) - len(reverse_primer), len(gfp_seq_with_his6)), strand=-1),
name="Reverse Primer",
label=True,
color=colors.red
)
# Dodanie miejsc cięcia restryktazy
for enzyme, sites in restriction_sites.items():
for site in sites:
feature_set.add_feature(
SeqFeature(FeatureLocation(site, site + 1), strand=0),
name=enzyme,
label=True,
color=colors.purple
)
# Rysowanie diagramu
diagram.draw(format="circular", circular=True, pagesize='A4', start=0, end=len(record), circle_core=0.5)
diagram.write("pdf/plasmid_map.pdf", "PDF")
print("Zapisano diagram plazmidu do pliku 'plasmid_map.pdf'")

66
gfp_sequence.gb Normal file
View File

@ -0,0 +1,66 @@
LOCUS SCU87974 818 bp mRNA linear SYN 24-FEB-1997
DEFINITION Synthetic construct modified green fluorescent protein GFP5-ER
(mgfp5-ER) mRNA, complete cds.
ACCESSION U87974
VERSION U87974.1
KEYWORDS .
SOURCE synthetic construct
ORGANISM synthetic construct
other sequences; artificial sequences.
REFERENCE 1 (bases 1 to 818)
AUTHORS Siemering,K.R., Golbik,R., Sever,R. and Haseloff,J.
TITLE Mutations that suppress the thermosensitivity of green fluorescent
protein
JOURNAL Curr. Biol. 6 (12), 1653-1663 (1996)
PUBMED 8994830
REFERENCE 2 (bases 1 to 818)
AUTHORS Haseloff,J., Siemering,K.R., Prasher,D. and Hodge,S.
TITLE Removal of a cryptic intron and subcellular localisation of green
fluorescent protein are required to mark transgenic Arabidopsis
plants brightly
JOURNAL Proc. Natl. Acad. Sci. U.S.A. (1997) In press
REFERENCE 3 (bases 1 to 818)
AUTHORS Siemering,K.R., Golbik,R., Sever,R. and Haseloff,J.
TITLE Direct Submission
JOURNAL Submitted (31-JAN-1997) Division of Cell Biology, MRC Laboratory of
Molecular Biology, Hills Road, Cambridge CB2 2QH, UK
FEATURES Location/Qualifiers
source 1..818
/organism="synthetic construct"
/mol_type="mRNA"
/db_xref="taxon:32630"
gene 1..818
/gene="mgfp5-ER"
CDS 21..812
/gene="mgfp5-ER"
/note="contains codon usage changes that disrupt a cryptic
plant intron, mutations that increase the thermotolerance
and change the spectral characteristics of the protein, and
sequences that code for signal peptides that result in
retention of the protein in the plant endoplasmic
reticulum"
/codon_start=1
/transl_table=11
/product="modified green fluorescent protein GFP5-ER"
/protein_id="AAB47999.1"
/translation="MKTNLFLFLIFSLLLSLSSAEFSKGEELFTGVVPILVELDGDVNG
HKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDF
FKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLE
YNYNSHNVYIMADKQKNGIKANFKTRHNIEDGGVQLADHYQQNTPIGDGPVLLPDNHYL
STQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYKHDEL"
ORIGIN
1 ggatccaagg agatataaca atgaagacta atctttttct ctttctcatc ttttcacttc
61 tcctatcatt atcctcggcc gaattcagta aaggagaaga acttttcact ggagttgtcc
121 caattcttgt tgaattagat ggtgatgtta atgggcacaa attttctgtc agtggagagg
181 gtgaaggtga tgcaacatac ggaaaactta cccttaaatt tatttgcact actggaaaac
241 tacctgttcc atggccaaca cttgtcacta ctttctctta tggtgttcaa tgcttttcaa
301 gatacccaga tcatatgaag cggcacgact tcttcaagag cgccatgcct gagggatacg
361 tgcaggagag gaccatcttc ttcaaggacg acgggaacta caagacacgt gctgaagtca
421 agtttgaggg agacaccctc gtcaacagga tcgagcttaa gggaatcgat ttcaaggagg
481 acggaaacat cctcggccac aagttggaat acaactacaa ctcccacaac gtatacatca
541 tggccgacaa gcaaaagaac ggcatcaaag ccaacttcaa gacccgccac aacatcgaag
601 acggcggcgt gcaactcgct gatcattatc aacaaaatac tccaattggc gatggccctg
661 tccttttacc agacaaccat tacctgtcca cacaatctgc cctttcgaaa gatcccaacg
721 aaaagagaga ccacatggtc cttcttgagt ttgtaacagc tgctgggatt acacatggca
781 tggatgaact atacaaacat gatgagcttt aagagctc
//

74
pdf/plasmid_map.pdf Normal file

File diff suppressed because one or more lines are too long

131
plasmid_sequence.gb Normal file
View File

@ -0,0 +1,131 @@
LOCUS MMU96626 4742 bp DNA linear ROD 07-FEB-1998
DEFINITION Mus musculus chondroadherin gene, complete cds.
ACCESSION U96626
VERSION U96626.1
KEYWORDS .
SOURCE Mus musculus (house mouse)
ORGANISM Mus musculus
Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
Mammalia; Eutheria; Euarchontoglires; Glires; Rodentia; Myomorpha;
Muroidea; Muridae; Murinae; Mus; Mus.
REFERENCE 1 (bases 1 to 4742)
AUTHORS Landgren,C., Beier,D.R., Fassler,R., Heinegard,D. and Sommarin,Y.
TITLE The mouse chondroadherin gene: characterization and chromosomal
localization
JOURNAL Genomics 47 (1), 84-91 (1998)
PUBMED 9465299
REFERENCE 2 (bases 1 to 4742)
AUTHORS Landgren,C. and Sommarin,Y.
TITLE Direct Submission
JOURNAL Submitted (07-APR-1997) Connective Tissue, Cell and Molecular
Biology, Lund 22100, Sweden
FEATURES Location/Qualifiers
source 1..4742
/organism="Mus musculus"
/mol_type="genomic DNA"
/db_xref="taxon:10090"
/chromosome="11"
/map="between D11Mit36 and D11Mit10"
mRNA join(671..1490,3420..3583,3831..3976,4202..4742)
/product="chondroadherin"
exon 671..1490
/number=1
CDS join(720..1490,3420..3583,3831..3972)
/codon_start=1
/product="chondroadherin"
/protein_id="AAC39963.1"
/translation="MARALLFSLVFLAILLPALAACPQNCHCHGDLQHVICDKVGLQKI
PKVSETTKLLNLQRNNFPVLAANSFRTMPNLVSLHLQHCNIREVAAGAFRGLKQLIYLY
LSHNDIRVLRAGAFDDLTELTYLYLDHNKVSELPRGLLSPLVNLFILQLNNNKIRELRA
GAFQGAKDLRWLYLSENALSSLQPGSLDDVENLAKFHLDKNQLSSYPSAALSKLRVVEE
LKLSHNPLKSIPDNAFQSFGRYLETLWLDNTNLEKFSDAAFSGVTTLKHVHLDNNRLNQ
LPSSFPFDNLETLTLTNNPWKCTCQLRGLRRWLEAKASRPDATCSSPAKFKGQRIRDTD
ALRSCKSPTKRSKKAGRH"
exon 3420..3583
/number=2
exon 3831..3976
/number=3
exon 4202..4742
/number=4
ORIGIN
1 gagctcttac gggcctggtg ccactgggct ccgagaaggg gcagagccaa acgcacggct
61 gtcagctagc ctctgcaacc agctccccca cctccttggg ataaactgag gaacccagaa
121 gcgggagccc aacccacagc agctctcacg ctccgcctgc gccgcacaac agtcccatta
181 aagcgccgcc ggctggccga ccgcggtgag acgcatccgg ctgtcgggcc ccacttcctc
241 cctcccggag tccagggtga cctgtctgcc aagggtgtat gggggaagga gacgtagaga
301 actcaaactt gagcaaataa ataagttctg ggaacacttc cctctgccca gtggaaattc
361 agaagcccct cgacacacct atcaccgtcc accccacctc ggggtgttgg tccagataga
421 ggagggtagg ggaaggtgca gcataatgtt tgcaaacagg aaccaagggg ttggggttca
481 ggggaagggc cctcagccct acacacggtc tcctgctgtg aaaagaggcc cccagccatc
541 gaggatgggg acgcatctct gggcgggaag gggttaaatc agtggcttcg gtgctccacg
601 tagtagctgg ctccgctgcc aactgcggtc aaggctgccc tataaatggg ccgggagacc
661 cgagagtcga ggacttgtcg ctgccttagc ccccagccca ggctcaaggc gttctaacca
721 tggcccgcgc gctcttattc agtctggtct ttcttgccat cctcctgcct gcgctagccg
781 cctgccccca aaactgccac tgccatggag atctgcagca tgtcatctgc gacaaggtgg
841 ggctgcagaa gatccccaag gtatcagaga caaccaaact gctcaatctc cagcgcaaca
901 acttcccggt gctggctgcc aactcgtttc ggaccatgcc gaacctggtc tccctgcacc
961 tgcaacactg caacatccgc gaggtggcgg ctggtgcctt ccgaggcctg aagcagctta
1021 tctacctgta cctgtcccac aacgacatcc gggtattgcg agctggagcc ttcgacgacc
1081 tgactgaact cacttacctc tatctagacc acaacaaagt gtcggaactg ccccgggggt
1141 tgctctctcc tctggtcaac ctcttcatct tgcaactcaa caacaacaaa atccgagagc
1201 tgcgtgctgg agctttccag ggggccaagg acctgcgctg gctctacctg tcagaaaatg
1261 ccctcagttc cctgcagcct ggttccctgg atgatgtgga gaacctagcc aagttccacc
1321 tggacaagaa ccagctgtct agctacccct cagccgccct gagcaaactt cgggtggtgg
1381 aggagctgaa gctgtctcac aaccctctga agagcatccc agacaatgcc ttccagtcct
1441 tcggtagata tctggagacc ctctggctgg ataacaccaa cctggagaag gtaagtgccc
1501 cagctgcagt tctcccggct ctttctagga tgccacccca ggaaccacat cgggacagca
1561 ttccctcctc ctggactcct tgtcactcag attatcctcc aaagcaaggg gcctttgcca
1621 cttctctctg cattgacaca cacccctccc catccttctc acccttacct ggtcccgagg
1681 cttctcagac agggctctct tgggactcct tgtaagagat ggaactgttc tggtcccaca
1741 aatggcctct ggggaggcat ctgccccaga ccccacggtt aacagcagac acctcttcct
1801 gttggacttt ccacccctct cttaccagtc tcctgcaagc ctaccatttc ataaaggaag
1861 gtttgtctac tgaggcacct tccagaagct gagagaacac aaagagtgcc tgttcccagt
1921 gggtgctggc ctgtgttgct aaggtaaagt gagaggccag aggatgaatt ggggagcagg
1981 taccttaagg agaactctcc cacaccctcg tcccattccc tccgtgggtc gaaacacact
2041 caattttatt gcaacctcgg gtcgatttca aatatggacg ggaaaccaag acacggagac
2101 ctgacttggc ttcaccgagt tgctggcttg gtgctcttgc agcccgtgaa taaacacatg
2161 caatccctgg cacacgcccc cccaagtgtc tcctcagagc acagactgac tgtctcactg
2221 tccagcagac agtttgttga cttttgcttt ccccagtcac cagatcccag actcctcaat
2281 tcttcctccc ttgggatgct caggctagcc tggtcggggc tgtctaggag caaagtcctt
2341 ttcagggaat caccctggta cacaacacag tttcagccag aaggttcggg cagagaccag
2401 agcgaggtag agtagttttg tttgtagaga acgtcacagc ccaatctggt ctgtgagagg
2461 tcacagatac tgagctgagg acagaacgcc aaccttatct aggtagatgg agccttctgg
2521 gtggatcttc tttagcctgg cctcacccca gctaaatgta gaccacccta gtgacaaaac
2581 agtggccagt agaacagcat acgaaggctg atttagaatg aggaggccaa agccgggcgt
2641 ggtagcgcac gccttcaatc ccagtacctg ggaggcaaag gcaggcgaat ttctaagttc
2701 gaggccagcc tggtctacag agtgagttcc aggacagcca ggactacaca gagaaaccct
2761 gtctcacaaa aaccaaaaag aaaaaagaaa aaaaaaagaa tgaggaggcc agagagaact
2821 ggcaccaagg ctcagaggag ggcaccaata catttcctct ggagccatcc atcctggcca
2881 aagtgcccac agagagctcc acaaatagca tctagcagaa ggcagagcaa tgaggacacc
2941 actgggggaa atcctgggac cagggggctg tggggagcct gggctcactg cccatcccac
3001 caaaccctga ccaggttcac cgtgtgcccg ccccaggcac actgcccatc ccaccaaaca
3061 ctgaccaggt tcaccgtgtg cccgcccggg cactcaaagc caagctctgc ctgctcccca
3121 gacccagtgt ttgtgcaggg aggtgctgcc taggaggtgg gggacagggt gtggtcctgc
3181 agctactgtt tactagctag gtgaccctgt ccaaaggact gaaccttttc cgtttacaca
3241 ttgggaagga ccccttccct ctctctctct ctctcaaaaa aaggtgttgt ggaggatgac
3301 atcattgtgg ctcctcaaca ggccagggcg tgtgtccagg gaaaagacaa ccctggtcct
3361 ctgagaaggg agccaggaga ctgagttgag gtgctcactc tgcctccctc acctcacagt
3421 tctcagatgc tgccttctcg ggtgtgacca cactgaaaca cgtccatctg gacaacaacc
3481 gcctgaacca actgccttcc tccttcccct ttgacaacct ggagaccctc actctcacca
3541 acaacccatg gaaatgcacc tgccagctcc gtggccttcg gcggtgagaa tattcctcca
3601 tataaccccc agactgccgt ccacatgaca gacggtccta gagtaggaca gcctggacat
3661 cctagtcagc tacctagcat gtcgggtact gagtggttcc cttctctcat ttgtcaaatg
3721 aagatgacaa ctccagatat ttctatggcc atagtccatc ccggtcactg tccctttccc
3781 aagccttccc acccagcttt tccaagccca gcaactcttt gtctctgtag gtggttggaa
3841 gccaaggctt ctcgaccgga tgctacctgc tcctcgccag ccaagttcaa gggtcagcgg
3901 attcgtgaca cagatgccct tcgcagctgc aaatccccga ccaagaggtc caagaaagct
3961 ggccgccatt aaacaggtgg gggctgggta gggaggccac cacggtctac ctttggaaat
4021 tccagatggg gtgctgctat atcccatgac accacttccg gaggagcaat cagttccctg
4081 tcttacaaga aaaggaggga ggacaggata acctctccca tggcttggcc taggacgtcc
4141 atgggtccct ttaatgactc tgggtgactg gaatcctaat acccatcttc tctcactata
4201 ggtcctgatc cagccagtcc tggcgactgc cttccgctgg agagactact gacgttccct
4261 cccatcatcc acaccttctc ctacagcctc tgcggatgca cagcgctgcc ccgcccccgc
4321 ccccacctag gtacatcctg gcaggggcac tgggctctct atcaccatcc cagctccacc
4381 cagtggggtc ctaggaaaga cacagaatcc ctccccagcc actgtgtctg ggctctgcca
4441 tggctccttt gagagaagct attgtagaac ctcctaccct ctgtccatcg gagctaaagc
4501 gcagtggtca ttgggatgac cacgttatta ccaccttcct cggttccctc tgtccctgcc
4561 atttggaaac aaacatcagg cccctgaccc accctgattg ccagaaagaa tttcaggccc
4621 atgccccaac tctgccagtt cctgcctgcc aggacatgct accaggatac cagtagcgct
4681 tggctgcata tccttcctgt ttgcgctcca gatttctata aacataaatg tatgtgtgtt
4741 ca
//