mirror of
https://github.com/vale981/vertiefungs_scraper
synced 2025-03-04 09:11:39 -05:00
init
This commit is contained in:
parent
403d457498
commit
c5404bc283
4 changed files with 292 additions and 0 deletions
1
events.json
Normal file
1
events.json
Normal file
File diff suppressed because one or more lines are too long
56
index.html
Normal file
56
index.html
Normal file
|
@ -0,0 +1,56 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Physik Vertiefungsbrowser</title>
|
||||
<script
|
||||
src="https://code.jquery.com/jquery-3.5.1.min.js"
|
||||
integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0="
|
||||
crossorigin="anonymous"></script>
|
||||
<script src="//cdn.jsdelivr.net/npm/pouchdb@7.2.1/dist/pouchdb.min.js"></script>
|
||||
<script src="//cdn.jsdelivr.net/npm/pouchdb@7.2.1/dist/pouchdb.find.min.js"></script>
|
||||
<script src="main.js"></script>
|
||||
<link rel="stylesheet" href="https://cdn.rawgit.com/Chalarangelo/mini.css/v3.0.1/dist/mini-default.min.css">
|
||||
|
||||
<style>
|
||||
table {
|
||||
max-height: none !important;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid black;
|
||||
overflow: show;
|
||||
}
|
||||
|
||||
.red {
|
||||
background-color: red;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<a href="#" class="logo">Hiro's Vertiefungsbrowser</a>
|
||||
</header>
|
||||
<div class="row">
|
||||
<div class="col-sm-3">
|
||||
<form id="settings">
|
||||
<label for="type">Typ:</label>
|
||||
<select id="type" name="type" multiple>
|
||||
<option value="tut" default>Uebung</option>
|
||||
<option value="lect" default>Vorlesung</option>
|
||||
</select>
|
||||
<label for="week">Woche:</label>
|
||||
<select id="week" name="week">
|
||||
<option value="ugw" default>ugW.</option>
|
||||
<option value="gw">gW.</option>
|
||||
</select>
|
||||
<div id="verts"></div>
|
||||
</form>
|
||||
</div>
|
||||
<div class="col-sm-9" id="table">
|
||||
Einfach Links auswaehlen :).
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<footer>
|
||||
<p>Von Valentin Boettcher, <a href="//protagon.space">Website</a></p>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
108
main.js
Normal file
108
main.js
Normal file
|
@ -0,0 +1,108 @@
|
|||
function getEvents() {
|
||||
return new Promise((resolve, reject) => {
|
||||
$.getJSON("events.json", function(data) {
|
||||
resolve(data);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
db = new PouchDB("main");
|
||||
subjects = {};
|
||||
|
||||
function setUpForm() {
|
||||
for(let vert in subjects) {
|
||||
$('#verts').append(`
|
||||
<h4>${vert}</h4>
|
||||
`);
|
||||
for(let sub of subjects[vert].keys()) {
|
||||
|
||||
$('#verts').append(`
|
||||
<label for="${sub}">${sub}</label>
|
||||
<input class="subj" type="checkbox" id="${sub}" name="${sub}" value="${sub}">
|
||||
<br>
|
||||
`);
|
||||
}
|
||||
$('#verts').append(`<hr>`);
|
||||
}
|
||||
|
||||
$('#settings').on('click', '*', e => {
|
||||
renderTable();
|
||||
});
|
||||
}
|
||||
|
||||
async function renderTable() {
|
||||
tbl = $('#table');
|
||||
checked_subj = new Set();
|
||||
for(let subj of $('.subj'))
|
||||
if($(subj).prop('checked'))
|
||||
checked_subj.add(subj.value);
|
||||
|
||||
types = $('#type').val();
|
||||
week = $('#week').val();
|
||||
|
||||
allsub = [];
|
||||
|
||||
|
||||
for(let time of Array.from({length: 7}, (x, i) => i + 1)) {
|
||||
allsub.push(db.find({
|
||||
sort: ["day"],
|
||||
selector:
|
||||
{name: {$in:Array.from(checked_subj)},day: {$exists: true}, time: time, week: {$in:[week, 'both']}, type: {$in: types}}}));
|
||||
|
||||
}
|
||||
|
||||
subs = await Promise.all(allsub);
|
||||
|
||||
let content = `<table><tr><th>DS</th>`;
|
||||
for(let day of ["Mo", "Di", "Mi", "Do", "Fr"])
|
||||
content += `<th>${day}</th>`;
|
||||
content += `</tr>`;
|
||||
|
||||
for(let time in subs) {
|
||||
content += `<tr><td>${parseInt(time) + 1}`;
|
||||
last_day = 0;
|
||||
weekday_subs = Array.from({length: 5}, (x, i) => []);
|
||||
for(let sub of subs[time].docs) {
|
||||
console.log( sub, sub.day );
|
||||
|
||||
weekday_subs[sub.day].push(sub);
|
||||
}
|
||||
console.log(weekday_subs);
|
||||
|
||||
for (let day of weekday_subs) {
|
||||
content += `<td class="${day.length > 1 ? "red" : ""}">`;
|
||||
for(let sub of day) {
|
||||
content += sub.name;
|
||||
if(sub.type === "tut")
|
||||
content += "(U)";
|
||||
content += ",<br>";
|
||||
}
|
||||
content += `</td>`;
|
||||
}
|
||||
content += `</tr>`;
|
||||
}
|
||||
|
||||
tbl.html(content);
|
||||
}
|
||||
|
||||
$(document).ready(() => {
|
||||
getEvents().then(events => {
|
||||
allput=[];
|
||||
for(let event of events) {
|
||||
if(!subjects[event.vert_name])
|
||||
subjects[event.vert_name] = new Set();
|
||||
subjects[event.vert_name].add(event.name);
|
||||
allput.push(db.put({_id: event.name + event.time.toString() + event.day.toString() + event.week,
|
||||
...event}).catch(() => true));
|
||||
}
|
||||
|
||||
return Promise.all(allput);
|
||||
|
||||
}).then(() => {
|
||||
return db.createIndex({
|
||||
index: {fields: ['day']}
|
||||
});
|
||||
}).then(() =>{
|
||||
setUpForm();
|
||||
});
|
||||
});
|
127
main.py
Normal file
127
main.py
Normal file
|
@ -0,0 +1,127 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from uritools import urijoin
|
||||
import re
|
||||
from enum import Enum
|
||||
import json
|
||||
|
||||
base = "https://tu-dresden.de/"
|
||||
time_regex = re.compile(
|
||||
r"(MO|DI|MI|DO|FR)\(([0-9])\)\s*(ugW|gw)*", re.MULTILINE | re.IGNORECASE
|
||||
)
|
||||
days = ["MO", "DI", "MI", "DO", "FR"]
|
||||
|
||||
|
||||
class Week(str, Enum):
|
||||
GW = "gw"
|
||||
UGW = "ugw"
|
||||
BOTH = "both"
|
||||
|
||||
|
||||
def parse_dates(text):
|
||||
parsed = []
|
||||
for day, time, week in time_regex.findall(text):
|
||||
day = days.index(day)
|
||||
week = (
|
||||
(Week.GW if re.match("gw", week, re.IGNORECASE) else Week.UGW)
|
||||
if week
|
||||
else Week.BOTH
|
||||
)
|
||||
|
||||
parsed.append(dict(day=day, week=week, time=int(time)))
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def parse_lecture(lect_link, name):
|
||||
lect_html = requests.get(urijoin(base, lect_link.get("href"))).text
|
||||
lect = BeautifulSoup(lect_html, features="html.parser")
|
||||
lect_times = (
|
||||
lect.find("div", class_="tudbox")
|
||||
.find(lambda tag: tag.name == "td" and "Zeit/Ort:" in tag.text)
|
||||
.find_next_sibling("td")
|
||||
.text.strip()
|
||||
)
|
||||
|
||||
dates = [
|
||||
dict(
|
||||
name=lect_link.text,
|
||||
vert_name=name,
|
||||
week=date["week"],
|
||||
time=date["time"],
|
||||
day=date["day"],
|
||||
type="lect",
|
||||
)
|
||||
for date in parse_dates(lect_times)
|
||||
]
|
||||
|
||||
tuts = None
|
||||
tuts_row = lect.find("div", class_="tudbox").find(
|
||||
lambda tag: tag.name == "td" and "Übungen:" in tag.text
|
||||
)
|
||||
|
||||
if tuts_row:
|
||||
dates += [
|
||||
dict(
|
||||
name=lect_link.text,
|
||||
vert_name=name,
|
||||
date=date,
|
||||
week=date["week"],
|
||||
time=date["time"],
|
||||
day=date["day"],
|
||||
type="tut",
|
||||
)
|
||||
for date in parse_dates(
|
||||
list(tuts_row.find_next_sibling("td").findAll("td"))[-1].text
|
||||
)
|
||||
]
|
||||
|
||||
return dates
|
||||
|
||||
|
||||
def get_lectures(vert_table, vert_name):
|
||||
return [
|
||||
event
|
||||
for lect in vert_table.findAll("a")
|
||||
for event in parse_lecture(lect, vert_name)
|
||||
]
|
||||
|
||||
|
||||
def get_vert_tables():
|
||||
vert_html = requests.get(
|
||||
"https://tu-dresden.de/mn/physik/studium/lehrveranstaltungen/vertiefungsgebiete-bachelor-und-master/katalog_wintersemester"
|
||||
).text
|
||||
soup = BeautifulSoup(vert_html, features="html.parser")
|
||||
|
||||
vert_tables = soup.findAll("table", class_="BodyTable")
|
||||
|
||||
verts = [
|
||||
lecture
|
||||
for vert in vert_tables
|
||||
for lecture in get_lectures(vert, vert.previous_sibling.text)
|
||||
]
|
||||
return verts
|
||||
|
||||
|
||||
def get_lectures_for_time(verts, time, tut=False, week=None):
|
||||
lects = [[] for _ in days]
|
||||
lnames = [[] for _ in days]
|
||||
for _, vert in verts.items():
|
||||
for lect in vert["lectures"]:
|
||||
times = lect["tutorial_times"] if tut else lect["lecture_times"]
|
||||
if times:
|
||||
for l_time in times:
|
||||
if l_time["time"] == time:
|
||||
if lect["name"] not in lnames[l_time["day"]]:
|
||||
if week is not None and l_time["week"].value != week.value:
|
||||
continue
|
||||
|
||||
lects[l_time["day"]].append(lect)
|
||||
lnames[l_time["day"]].append(lect["name"])
|
||||
|
||||
return lects
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
all = get_vert_tables()
|
||||
print(json.dumps(all))
|
Loading…
Add table
Reference in a new issue