Rules99 commited on
Commit
66ab79e
·
1 Parent(s): c33f87e
Files changed (1) hide show
  1. app.py +60 -60
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import numpy as np
2
  import pickle
3
  import pandas as pd
4
- import requests
5
- from selenium import webdriver
6
  import matplotlib.pyplot as plt
7
  #Simple assignment
8
- from selenium.webdriver import Firefox
9
- from selenium.webdriver.common.keys import Keys
10
  from selenium.common.exceptions import NoSuchElementException
11
  import requests
12
  import os
@@ -20,73 +20,73 @@ import streamlit as st
20
  ### Scrap the cosmic id information
21
  # ### FRAMEWORKS NEEDED
22
 
23
- def scrap():
24
- #### Setting options to the driver
25
- options = webdriver.FirefoxOptions()
26
- options.add_argument('--headless')
27
- options.add_argument('--no-sandbox')
28
- options.add_argument('--disable-dev-shm-usage')
29
- options.capabilities
30
- ### Setting options of webdriver
31
- # a) Setting the chromedriver
32
- browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
33
- ### Functions and execution to run the scrapping
34
-
35
-
36
- def getinfofromtable(oddrows:list,score:float,headertable)->list:
37
- rows = []
38
- for row in oddrows:
39
- cols = []
40
- for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
41
- if i==headertable.index( 'Primary Tissue') or i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
42
- cols.append(col.text)
43
- cols.append(score)
44
- rows.append(cols)
45
- return rows
46
- def getinfocosmic(mutationid):
47
- import time
48
- search = browser.find_element_by_id('search-field')
49
- search = search.find_element_by_class_name("text_def")
50
- search.send_keys(mutationid)
51
- search.send_keys(Keys.RETURN)
52
- time.sleep(5)
53
- try:
54
- container = browser.find_element_by_id("section-list")
55
 
56
- except NoSuchElementException:
57
- return []
58
 
59
- try:
60
 
61
- subq1 = container.text[container.text.find("score")+len("score"):]
62
- score = float(subq1[:subq1.find(")")].strip())
63
- except ValueError:
64
- score = 0
65
 
66
 
67
 
68
- section = browser.find_element_by_id("DataTables_Table_0")
69
 
70
 
71
- headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
72
 
73
- oddrows = section.find_elements_by_class_name("odd")
74
- evenrows = section.find_elements_by_class_name("even")
75
 
76
- l1 = getinfofromtable(oddrows,score,headertable)
77
- l1.extend(getinfofromtable(evenrows,score,headertable))
78
 
79
- # browser.close()
80
- return l1
81
- ## Looking for cosmic id info
82
- cosl = []
83
- browser.get("https://cancer.sanger.ac.uk/cosmic")
84
- for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
85
- if cos.find(",")!=-1:
86
- cos = cos.split(",")[0]
87
-
88
- cosl.append(getinfocosmic(cos))
89
- browser.get("https://cancer.sanger.ac.uk/cosmic")
90
  ### Pieplots
91
  def pieplot(merging,id=0):
92
  genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
 
1
  import numpy as np
2
  import pickle
3
  import pandas as pd
4
+ # import requests
5
+ # from selenium import webdriver
6
  import matplotlib.pyplot as plt
7
  #Simple assignment
8
+ # from selenium.webdriver import Firefox
9
+ # from selenium.webdriver.common.keys import Keys
10
  from selenium.common.exceptions import NoSuchElementException
11
  import requests
12
  import os
 
20
  ### Scrap the cosmic id information
21
  # ### FRAMEWORKS NEEDED
22
 
23
+ # def scrap():
24
+ # #### Setting options to the driver
25
+ # options = webdriver.FirefoxOptions()
26
+ # options.add_argument('--headless')
27
+ # options.add_argument('--no-sandbox')
28
+ # options.add_argument('--disable-dev-shm-usage')
29
+ # options.capabilities
30
+ # ### Setting options of webdriver
31
+ # # a) Setting the chromedriver
32
+ # browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
33
+ # ### Functions and execution to run the scrapping
34
+
35
+
36
+ # def getinfofromtable(oddrows:list,score:float,headertable)->list:
37
+ # rows = []
38
+ # for row in oddrows:
39
+ # cols = []
40
+ # for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
41
+ # if i==headertable.index( 'Primary Tissue') or i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
42
+ # cols.append(col.text)
43
+ # cols.append(score)
44
+ # rows.append(cols)
45
+ # return rows
46
+ # def getinfocosmic(mutationid):
47
+ # import time
48
+ # search = browser.find_element_by_id('search-field')
49
+ # search = search.find_element_by_class_name("text_def")
50
+ # search.send_keys(mutationid)
51
+ # search.send_keys(Keys.RETURN)
52
+ # time.sleep(5)
53
+ # try:
54
+ # container = browser.find_element_by_id("section-list")
55
 
56
+ # except NoSuchElementException:
57
+ # return []
58
 
59
+ # try:
60
 
61
+ # subq1 = container.text[container.text.find("score")+len("score"):]
62
+ # score = float(subq1[:subq1.find(")")].strip())
63
+ # except ValueError:
64
+ # score = 0
65
 
66
 
67
 
68
+ # section = browser.find_element_by_id("DataTables_Table_0")
69
 
70
 
71
+ # headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
72
 
73
+ # oddrows = section.find_elements_by_class_name("odd")
74
+ # evenrows = section.find_elements_by_class_name("even")
75
 
76
+ # l1 = getinfofromtable(oddrows,score,headertable)
77
+ # l1.extend(getinfofromtable(evenrows,score,headertable))
78
 
79
+ # # browser.close()
80
+ # return l1
81
+ # ## Looking for cosmic id info
82
+ # cosl = []
83
+ # browser.get("https://cancer.sanger.ac.uk/cosmic")
84
+ # for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
85
+ # if cos.find(",")!=-1:
86
+ # cos = cos.split(",")[0]
87
+
88
+ # cosl.append(getinfocosmic(cos))
89
+ # browser.get("https://cancer.sanger.ac.uk/cosmic")
90
  ### Pieplots
91
  def pieplot(merging,id=0):
92
  genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()