inf04-web2/entrypoint.py

60 lines
2.3 KiB
Python
Raw Normal View History

2024-06-20 19:10:59 +00:00
from Bio import SeqIO
from sqlalchemy.orm import Session
from app.database import engine, SessionLocal, Base
from app.models import Sequence, Feature
import json
# Tworzenie tabeli w bazie danych
Base.metadata.create_all(bind=engine)
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
def load_genbank_to_db(file_path: str):
db = next(get_db())
with open(file_path, 'r') as file:
record = SeqIO.read(file, "genbank")
# Zapisz sekwencję do bazy danych
sequence_data = Sequence(
name=record.name,
2024-06-20 20:19:38 +00:00
description=record.description if 'description' in record.annotations else 'No description',
2024-06-20 19:10:59 +00:00
sequence=str(record.seq)
)
db.add(sequence_data)
db.commit()
2024-06-20 20:19:38 +00:00
db.refresh(sequence_data) # Odśwież sekwencję, aby uzyskać jej ID
2024-06-20 19:10:59 +00:00
# Zapisz cechy (features) do bazy danych
for feature in record.features:
2024-06-20 20:19:38 +00:00
if feature.type in ["promoter", "RBS","CDS", "protein_bind", "misc_feature", "rep_origin", "terminator"]:
qualifiers = {}
if feature.type == "CDS":
qualifiers['gene_name'] = feature.qualifiers.get('gene', ['Unknown gene'])[0]
qualifiers['product'] = feature.qualifiers.get('product', ['Unknown product'])[0]
elif feature.type == "protein_bind":
qualifiers['binding_site'] = feature.qualifiers.get('bound_moiety', ['Unknown binding site'])[0]
elif feature.type == "misc_feature":
qualifiers['note'] = feature.qualifiers.get('note', ['No additional information'])[0]
elif feature.type == "rep_origin":
qualifiers['note'] = "This is a replication origin."
feature_data = Feature(
type=feature.type,
location=str(feature.location),
sequence=str(record.seq[feature.location.start:feature.location.end]),
qualifiers=json.dumps(qualifiers),
sequence_id=sequence_data.id # Ustawienie poprawnego ID
)
db.add(feature_data)
2024-06-20 19:10:59 +00:00
db.commit()
print(f"Loaded {file_path} to database.")
if __name__ == "__main__":
2024-06-20 20:19:38 +00:00
load_genbank_to_db("data/plasmid.gb")
2024-06-20 19:10:59 +00:00