Compare commits

...

2 Commits

Author SHA1 Message Date
Pin
47a4850f00 small doc changes 2022-01-23 17:09:00 -05:00
Pin
f1e76d8d41 Added README 2022-01-13 21:47:44 -05:00
2 changed files with 28 additions and 3 deletions

View File

@@ -1 +1,20 @@
# Please use this read me file for the information required to be submitted per the assignment on MyCourses.
# HTTP JavaScript Scraper
Run the code with the following:
```bash
# python3 main.py {proto}://{site}
python3 main.py https://www.nintendo.com
```
Any site that only serves HTTP/2 content will return a 301 HTTP response code.
It will return a 301 trying to redirect you to HTTP/2 on the same address and port.
Any status code other than 200 will be reported back to the user without any additional processing.
The output of the program will contain the unique number of external resources.
If two files are referenced from `abc.com` they will only count as one external reference.
Any reference that shares the same base URL will count once.
A full list of references will also be printed out regardless of the uniqueness.
No additional dependencies are required to run the program.

10
main.py
View File

@@ -18,7 +18,7 @@ def generate_request():
global USER_AGENT
global REQUEST
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0"
REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT)
REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nAccept-Encoding: Identity\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT)
return
def pull_external_url(text):
@@ -159,6 +159,12 @@ def conn_type_parse():
return conn_port
def check_connection_succ(text):
"""
Exits program if any HTTP responce code other than 200 is met
:param str: full HTTP responce
"""
# If the website is using HTTP/2 and has no HTTP/1.1 options
# you will receive an error here since it returns a 301 code
if text.split("\n")[0].find("200") == -1:
print("Error on HTTP request")
print("HTTP Return Code: %s" % text.split("\n")[0])
@@ -181,7 +187,7 @@ def main():
# Obtain data from response
fr = ""
while True:
r = w.recv(8192)
r = w.recv(32768)
if not r:
break
fr += r.decode()