-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBD_assignment.R
More file actions
78 lines (66 loc) · 2.39 KB
/
BD_assignment.R
File metadata and controls
78 lines (66 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
############################################################
# Project: Biological Databases – Multi-Source Integration
# Student ID: B269797
# Description:
# Integrate gene, protein, pathway, and miRNA data from
# Ensembl, UniProt, KEGG, and miRBase using R and MySQL.
############################################################
# ---- Load Packages ----
library(DBI)
library(RMySQL)
library(biomaRt)
library(httr)
library(jsonlite)
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
# ---- Connect to MySQL ----
con <- dbConnect(
MySQL(),
user = "your_username",
password = "your_password",
dbname = "bioinfo_db",
host = "localhost"
)
# ---- Gene List ----
genes <- c("ENSMUSG00000036061","ENSMUSG00000000555",
"ENSMUSG00000023055","ENSMUSG00000075394",
"ENSMUSG00000001655")
# ---- Ensembl: Gene Annotations ----
ensembl <- useEnsembl(biomart="genes", dataset="mmusculus_gene_ensembl")
gene_df <- getBM(
attributes=c("ensembl_gene_id","external_gene_name","description",
"gene_biotype","chromosome_name","start_position",
"end_position","strand","uniprotswissprot","entrezgene_id"),
filters="ensembl_gene_id", values=genes, mart=ensembl
)
dbWriteTable(con, "gene_annotations", gene_df, append=TRUE, row.names=FALSE)
# ---- UniProt: Protein Info ----
fetch_protein <- function(uid){
url <- paste0("https://rest.uniprot.org/uniprotkb/", uid, ".json")
r <- GET(url)
if(status_code(r)==200) fromJSON(content(r,"text")) else NULL
}
# Example:
# prot <- fetch_protein("P10629")
# ---- KEGG: Pathway Enrichment ----
entrez <- unique(na.omit(gene_df$entrezgene_id))
kegg_enrich <- enrichKEGG(gene=entrez, organism="mmu",
pvalueCutoff=0.05, qvalueCutoff=0.2)
kegg_df <- as.data.frame(kegg_enrich)
dbWriteTable(con, "kegg_data", kegg_df, append=TRUE, row.names=FALSE)
# ---- miRBase: miRNA Mapping ----
mir_df <- getBM(
attributes=c("ensembl_gene_id","external_gene_name",
"mirbase_id","mirbase_accession"),
filters="ensembl_gene_id",
values="ENSMUSG00000076010", mart=ensembl
)
dbWriteTable(con, "mirna_data", mir_df, append=TRUE, row.names=FALSE)
# ---- Create SQL Summary Table ----
dbExecute(con, "SOURCE B269797_schema.sql")
# ---- Disconnect ----
dbDisconnect(con)
############################################################
# End of Script
############################################################