From 67d4f5a19be22a085ecc4e5e37e9a210e66549f5 Mon Sep 17 00:00:00 2001 From: Pin Date: Thu, 13 Jan 2022 20:51:31 -0500 Subject: [PATCH] Initial commit --- main.py | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..9b907e8 --- /dev/null +++ b/main.py @@ -0,0 +1,202 @@ +import socket, ssl # Used for HTTP(s) socket connection +import sys # Used for system arguments + +# Global Variables +# HOST_REQUEST - Used to define what host is being connected to +# PORT_REQUEST - Used to define what port should be connected to +# USER_AGENT - Used to spook the requesting user agent on the HTTP request +# REQUEST - Used to define the HTTP request send to the remote host +global HOST_REQUEST +global PORT_REQUEST +global USER_AGENT +global REQUEST + +def generate_request(): + """ + Generate raw HTTP request and set variables for global use + """ + global USER_AGENT + global REQUEST + USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0" + REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT) + return + +def pull_external_url(text): + """ + Pulls all external references from passed parameter (Anything with "//" is assumed to be an external reference) + :param text: Array of potential references + :return List[str]: Array of all external references + """ + external_ref = [] + for work in text: + # It is assumed that all external references start with "//" + # All internal references *should* start with "/" + work_split = work.split("//") + if len(work_split) > 1: + external_ref.append(work_split[-1]) + return external_ref + +def pull_url(text): + """ + Extract all references within quotes + :param text: String of potential link full line + :return str: String of potential stripped link + """ + try: + url_start = text.index("\"") + 1 + url_end = text.find("\"",url_start) + except: + url_start = text.find("\'") + 1 + url_end = text.find("\'",url_start) + + return(str(text[url_start:url_end])) + +def find_ref(text): + """ + Look for all links containing ".js" extension + :param text: String of full HTTP response + :return List[str]: Array of all links to .js extensions + """ + found_ref = [] + for line in text.split('\n'): + try: + er = line.index(".js") + found_ref.append(pull_url(line)) + pull_url(line) + except: + # If line does not contain .js continue + continue + return found_ref + +def unique_refs(links): + """ + Determine a list of unique references by base url + :param links: Array of all external references + :return List[srt]: Array of all unique base urls + """ + base_url = [] + for link in links: + base_url.append(link.split("/")[0]) + + # Remove duplicate entries after stripping down to base url via "set" + unique_base_url = list(set(base_url)) + return len(unique_base_url) + +def HTTP_CONNECTION(): + """ + Perform a HTTP connection to the globally referenced HOST/PORT + :return socket: Socket to referenced HOST/PORT + """ + try: + # Create request socket + w = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + w.connect((HOST_REQUEST, PORT_REQUEST)) + w.send(REQUEST.encode()) + except: + print("Error Connecting via HTTP") + exit(1) + return w + +def HTTPS_CONNECTION(): + """ + Perform a HTTPS connection to the globally referenced HOST/PORT + :return socket: Socket to referenced HOST/PORT + """ + try: + # Create request socket + context = ssl.create_default_context() + w = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s_w = context.wrap_socket(w, server_hostname=HOST_REQUEST) + s_w.connect((HOST_REQUEST, PORT_REQUEST)) + s_w.send(REQUEST.encode()) + except: + print("Error Connecting via HTTPS") + exit(1) + return s_w + +def display_results(links): + """ + Display results - Total unique external references, all external reference links + :param links: All links to be displayed + """ + num = 1 # Used to start a numbered list of links + print("""\ +====================== + Unique External + References: %s +====================== +| External Links | +======================""" % unique_refs(links)) + for link in links: + print("%d: %s" % (num, link)) + num += 1 + print() + return + +def site_parse(): + """ + Grab the site requested from the command line + :return str: String of requested site + """ + try: + parse_url = sys.argv[1].split("//")[-1] + except: + parse_url = "null" + return parse_url + +def conn_type_parse(): + """ + Return the port which the connection should be made on + :return int: Port which connection should be made on + """ + conn_port = 80 + try: + conn_type = sys.argv[1].split("//")[0] + except: + conn_type = "http" + if not conn_type.find("https"): + conn_port = 443 + return conn_port + +def check_connection_succ(text): + if text.split("\n")[0].find("200") == -1: + print("Error on HTTP request") + print("HTTP Return Code: %s" % text.split("\n")[0]) + exit(1) + return + +def main(): + global HOST_REQUEST + global PORT_REQUEST + HOST_REQUEST = site_parse() + PORT_REQUEST = conn_type_parse() + generate_request() + + # Open connection based on request type + if PORT_REQUEST == 443: + w = HTTPS_CONNECTION() + elif PORT_REQUEST == 80: + w = HTTP_CONNECTION() + + # Obtain data from response + fr = "" + while True: + r = w.recv(8192) + if not r: + break + fr += r.decode() + + # Close socket + w.close() + + # Check return contents + check_connection_succ(fr) + + # Gather results and display them + js_refs = find_ref(fr) + external_links = pull_external_url(js_refs) + display_results(external_links) + +if __name__ == "__main__": + main() +