mirror of
https://github.com/vale981/vertiefungs_scraper
synced 2025-03-04 17:21:38 -05:00
init
This commit is contained in:
parent
403d457498
commit
c5404bc283
4 changed files with 292 additions and 0 deletions
1
events.json
Normal file
1
events.json
Normal file
File diff suppressed because one or more lines are too long
56
index.html
Normal file
56
index.html
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Physik Vertiefungsbrowser</title>
|
||||||
|
<script
|
||||||
|
src="https://code.jquery.com/jquery-3.5.1.min.js"
|
||||||
|
integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0="
|
||||||
|
crossorigin="anonymous"></script>
|
||||||
|
<script src="//cdn.jsdelivr.net/npm/pouchdb@7.2.1/dist/pouchdb.min.js"></script>
|
||||||
|
<script src="//cdn.jsdelivr.net/npm/pouchdb@7.2.1/dist/pouchdb.find.min.js"></script>
|
||||||
|
<script src="main.js"></script>
|
||||||
|
<link rel="stylesheet" href="https://cdn.rawgit.com/Chalarangelo/mini.css/v3.0.1/dist/mini-default.min.css">
|
||||||
|
|
||||||
|
<style>
|
||||||
|
table {
|
||||||
|
max-height: none !important;
|
||||||
|
}
|
||||||
|
th, td {
|
||||||
|
border: 1px solid black;
|
||||||
|
overflow: show;
|
||||||
|
}
|
||||||
|
|
||||||
|
.red {
|
||||||
|
background-color: red;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<a href="#" class="logo">Hiro's Vertiefungsbrowser</a>
|
||||||
|
</header>
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-sm-3">
|
||||||
|
<form id="settings">
|
||||||
|
<label for="type">Typ:</label>
|
||||||
|
<select id="type" name="type" multiple>
|
||||||
|
<option value="tut" default>Uebung</option>
|
||||||
|
<option value="lect" default>Vorlesung</option>
|
||||||
|
</select>
|
||||||
|
<label for="week">Woche:</label>
|
||||||
|
<select id="week" name="week">
|
||||||
|
<option value="ugw" default>ugW.</option>
|
||||||
|
<option value="gw">gW.</option>
|
||||||
|
</select>
|
||||||
|
<div id="verts"></div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
<div class="col-sm-9" id="table">
|
||||||
|
Einfach Links auswaehlen :).
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<footer>
|
||||||
|
<p>Von Valentin Boettcher, <a href="//protagon.space">Website</a></p>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
108
main.js
Normal file
108
main.js
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
function getEvents() {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
$.getJSON("events.json", function(data) {
|
||||||
|
resolve(data);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
db = new PouchDB("main");
|
||||||
|
subjects = {};
|
||||||
|
|
||||||
|
function setUpForm() {
|
||||||
|
for(let vert in subjects) {
|
||||||
|
$('#verts').append(`
|
||||||
|
<h4>${vert}</h4>
|
||||||
|
`);
|
||||||
|
for(let sub of subjects[vert].keys()) {
|
||||||
|
|
||||||
|
$('#verts').append(`
|
||||||
|
<label for="${sub}">${sub}</label>
|
||||||
|
<input class="subj" type="checkbox" id="${sub}" name="${sub}" value="${sub}">
|
||||||
|
<br>
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
$('#verts').append(`<hr>`);
|
||||||
|
}
|
||||||
|
|
||||||
|
$('#settings').on('click', '*', e => {
|
||||||
|
renderTable();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function renderTable() {
|
||||||
|
tbl = $('#table');
|
||||||
|
checked_subj = new Set();
|
||||||
|
for(let subj of $('.subj'))
|
||||||
|
if($(subj).prop('checked'))
|
||||||
|
checked_subj.add(subj.value);
|
||||||
|
|
||||||
|
types = $('#type').val();
|
||||||
|
week = $('#week').val();
|
||||||
|
|
||||||
|
allsub = [];
|
||||||
|
|
||||||
|
|
||||||
|
for(let time of Array.from({length: 7}, (x, i) => i + 1)) {
|
||||||
|
allsub.push(db.find({
|
||||||
|
sort: ["day"],
|
||||||
|
selector:
|
||||||
|
{name: {$in:Array.from(checked_subj)},day: {$exists: true}, time: time, week: {$in:[week, 'both']}, type: {$in: types}}}));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
subs = await Promise.all(allsub);
|
||||||
|
|
||||||
|
let content = `<table><tr><th>DS</th>`;
|
||||||
|
for(let day of ["Mo", "Di", "Mi", "Do", "Fr"])
|
||||||
|
content += `<th>${day}</th>`;
|
||||||
|
content += `</tr>`;
|
||||||
|
|
||||||
|
for(let time in subs) {
|
||||||
|
content += `<tr><td>${parseInt(time) + 1}`;
|
||||||
|
last_day = 0;
|
||||||
|
weekday_subs = Array.from({length: 5}, (x, i) => []);
|
||||||
|
for(let sub of subs[time].docs) {
|
||||||
|
console.log( sub, sub.day );
|
||||||
|
|
||||||
|
weekday_subs[sub.day].push(sub);
|
||||||
|
}
|
||||||
|
console.log(weekday_subs);
|
||||||
|
|
||||||
|
for (let day of weekday_subs) {
|
||||||
|
content += `<td class="${day.length > 1 ? "red" : ""}">`;
|
||||||
|
for(let sub of day) {
|
||||||
|
content += sub.name;
|
||||||
|
if(sub.type === "tut")
|
||||||
|
content += "(U)";
|
||||||
|
content += ",<br>";
|
||||||
|
}
|
||||||
|
content += `</td>`;
|
||||||
|
}
|
||||||
|
content += `</tr>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
tbl.html(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
$(document).ready(() => {
|
||||||
|
getEvents().then(events => {
|
||||||
|
allput=[];
|
||||||
|
for(let event of events) {
|
||||||
|
if(!subjects[event.vert_name])
|
||||||
|
subjects[event.vert_name] = new Set();
|
||||||
|
subjects[event.vert_name].add(event.name);
|
||||||
|
allput.push(db.put({_id: event.name + event.time.toString() + event.day.toString() + event.week,
|
||||||
|
...event}).catch(() => true));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Promise.all(allput);
|
||||||
|
|
||||||
|
}).then(() => {
|
||||||
|
return db.createIndex({
|
||||||
|
index: {fields: ['day']}
|
||||||
|
});
|
||||||
|
}).then(() =>{
|
||||||
|
setUpForm();
|
||||||
|
});
|
||||||
|
});
|
127
main.py
Normal file
127
main.py
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from uritools import urijoin
|
||||||
|
import re
|
||||||
|
from enum import Enum
|
||||||
|
import json
|
||||||
|
|
||||||
|
base = "https://tu-dresden.de/"
|
||||||
|
time_regex = re.compile(
|
||||||
|
r"(MO|DI|MI|DO|FR)\(([0-9])\)\s*(ugW|gw)*", re.MULTILINE | re.IGNORECASE
|
||||||
|
)
|
||||||
|
days = ["MO", "DI", "MI", "DO", "FR"]
|
||||||
|
|
||||||
|
|
||||||
|
class Week(str, Enum):
|
||||||
|
GW = "gw"
|
||||||
|
UGW = "ugw"
|
||||||
|
BOTH = "both"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_dates(text):
|
||||||
|
parsed = []
|
||||||
|
for day, time, week in time_regex.findall(text):
|
||||||
|
day = days.index(day)
|
||||||
|
week = (
|
||||||
|
(Week.GW if re.match("gw", week, re.IGNORECASE) else Week.UGW)
|
||||||
|
if week
|
||||||
|
else Week.BOTH
|
||||||
|
)
|
||||||
|
|
||||||
|
parsed.append(dict(day=day, week=week, time=int(time)))
|
||||||
|
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def parse_lecture(lect_link, name):
|
||||||
|
lect_html = requests.get(urijoin(base, lect_link.get("href"))).text
|
||||||
|
lect = BeautifulSoup(lect_html, features="html.parser")
|
||||||
|
lect_times = (
|
||||||
|
lect.find("div", class_="tudbox")
|
||||||
|
.find(lambda tag: tag.name == "td" and "Zeit/Ort:" in tag.text)
|
||||||
|
.find_next_sibling("td")
|
||||||
|
.text.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
dates = [
|
||||||
|
dict(
|
||||||
|
name=lect_link.text,
|
||||||
|
vert_name=name,
|
||||||
|
week=date["week"],
|
||||||
|
time=date["time"],
|
||||||
|
day=date["day"],
|
||||||
|
type="lect",
|
||||||
|
)
|
||||||
|
for date in parse_dates(lect_times)
|
||||||
|
]
|
||||||
|
|
||||||
|
tuts = None
|
||||||
|
tuts_row = lect.find("div", class_="tudbox").find(
|
||||||
|
lambda tag: tag.name == "td" and "Übungen:" in tag.text
|
||||||
|
)
|
||||||
|
|
||||||
|
if tuts_row:
|
||||||
|
dates += [
|
||||||
|
dict(
|
||||||
|
name=lect_link.text,
|
||||||
|
vert_name=name,
|
||||||
|
date=date,
|
||||||
|
week=date["week"],
|
||||||
|
time=date["time"],
|
||||||
|
day=date["day"],
|
||||||
|
type="tut",
|
||||||
|
)
|
||||||
|
for date in parse_dates(
|
||||||
|
list(tuts_row.find_next_sibling("td").findAll("td"))[-1].text
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
return dates
|
||||||
|
|
||||||
|
|
||||||
|
def get_lectures(vert_table, vert_name):
|
||||||
|
return [
|
||||||
|
event
|
||||||
|
for lect in vert_table.findAll("a")
|
||||||
|
for event in parse_lecture(lect, vert_name)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_vert_tables():
|
||||||
|
vert_html = requests.get(
|
||||||
|
"https://tu-dresden.de/mn/physik/studium/lehrveranstaltungen/vertiefungsgebiete-bachelor-und-master/katalog_wintersemester"
|
||||||
|
).text
|
||||||
|
soup = BeautifulSoup(vert_html, features="html.parser")
|
||||||
|
|
||||||
|
vert_tables = soup.findAll("table", class_="BodyTable")
|
||||||
|
|
||||||
|
verts = [
|
||||||
|
lecture
|
||||||
|
for vert in vert_tables
|
||||||
|
for lecture in get_lectures(vert, vert.previous_sibling.text)
|
||||||
|
]
|
||||||
|
return verts
|
||||||
|
|
||||||
|
|
||||||
|
def get_lectures_for_time(verts, time, tut=False, week=None):
|
||||||
|
lects = [[] for _ in days]
|
||||||
|
lnames = [[] for _ in days]
|
||||||
|
for _, vert in verts.items():
|
||||||
|
for lect in vert["lectures"]:
|
||||||
|
times = lect["tutorial_times"] if tut else lect["lecture_times"]
|
||||||
|
if times:
|
||||||
|
for l_time in times:
|
||||||
|
if l_time["time"] == time:
|
||||||
|
if lect["name"] not in lnames[l_time["day"]]:
|
||||||
|
if week is not None and l_time["week"].value != week.value:
|
||||||
|
continue
|
||||||
|
|
||||||
|
lects[l_time["day"]].append(lect)
|
||||||
|
lnames[l_time["day"]].append(lect["name"])
|
||||||
|
|
||||||
|
return lects
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
all = get_vert_tables()
|
||||||
|
print(json.dumps(all))
|
Loading…
Add table
Reference in a new issue