diff --git a/README.md b/README.md index 3bec1f2..258a706 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,18 @@ Run the code with the following: ```bash +# python3 main.py {proto}://{site} python3 main.py https://www.nintendo.com ``` -Any site that does not serve HTTP/2 content will return a 301 HTTP response code and fail out of the program. -This can be seen by plugging in `https://www.rit.edu` into the input. +Any site that only serves HTTP/2 content will return a 301 HTTP response code. It will return a 301 trying to redirect you to HTTP/2 on the same address and port. -No additional dependencies are required. + +Any status code other than 200 will be reported back to the user without any additional processing. +The output of the program will contain the unique number of external resources. +If two files are referenced from `abc.com` they will only count as one external reference. +Any reference that shares the same base URL will count once. +A full list of references will also be printed out regardless of the uniqueness. + +No additional dependencies are required to run the program. diff --git a/main.py b/main.py index 066028b..d0cc798 100644 --- a/main.py +++ b/main.py @@ -18,7 +18,7 @@ def generate_request(): global USER_AGENT global REQUEST USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0" - REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT) + REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nAccept-Encoding: Identity\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT) return def pull_external_url(text): @@ -187,7 +187,7 @@ def main(): # Obtain data from response fr = "" while True: - r = w.recv(8192) + r = w.recv(32768) if not r: break fr += r.decode()