import socket, ssl # Used for HTTP(s) socket connection import sys # Used for system arguments # Global Variables # HOST_REQUEST - Used to define what host is being connected to # PORT_REQUEST - Used to define what port should be connected to # USER_AGENT - Used to spook the requesting user agent on the HTTP request # REQUEST - Used to define the HTTP request send to the remote host global HOST_REQUEST global PORT_REQUEST global USER_AGENT global REQUEST def generate_request(): """ Generate raw HTTP request and set variables for global use """ global USER_AGENT global REQUEST USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0" REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nAccept-Encoding: Identity\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT) return def pull_external_url(text): """ Pulls all external references from passed parameter (Anything with "//" is assumed to be an external reference) :param text: Array of potential references :return List[str]: Array of all external references """ external_ref = [] for work in text: # It is assumed that all external references start with "//" # All internal references *should* start with "/" work_split = work.split("//") if len(work_split) > 1: external_ref.append(work_split[-1]) return external_ref def pull_url(text): """ Extract all references within quotes :param text: String of potential link full line :return str: String of potential stripped link """ try: url_start = text.index("\"") + 1 url_end = text.find("\"",url_start) except: url_start = text.find("\'") + 1 url_end = text.find("\'",url_start) return(str(text[url_start:url_end])) def find_ref(text): """ Look for all links containing ".js" extension :param text: String of full HTTP response :return List[str]: Array of all links to .js extensions """ found_ref = [] for line in text.split('\n'): try: er = line.index(".js") found_ref.append(pull_url(line)) pull_url(line) except: # If line does not contain .js continue continue return found_ref def unique_refs(links): """ Determine a list of unique references by base url :param links: Array of all external references :return List[srt]: Array of all unique base urls """ base_url = [] for link in links: base_url.append(link.split("/")[0]) # Remove duplicate entries after stripping down to base url via "set" unique_base_url = list(set(base_url)) return len(unique_base_url) def HTTP_CONNECTION(): """ Perform a HTTP connection to the globally referenced HOST/PORT :return socket: Socket to referenced HOST/PORT """ try: # Create request socket w = socket.socket(socket.AF_INET, socket.SOCK_STREAM) w.connect((HOST_REQUEST, PORT_REQUEST)) w.send(REQUEST.encode()) except: print("Error Connecting via HTTP") exit(1) return w def HTTPS_CONNECTION(): """ Perform a HTTPS connection to the globally referenced HOST/PORT :return socket: Socket to referenced HOST/PORT """ try: # Create request socket context = ssl.create_default_context() w = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s_w = context.wrap_socket(w, server_hostname=HOST_REQUEST) s_w.connect((HOST_REQUEST, PORT_REQUEST)) s_w.send(REQUEST.encode()) except: print("Error Connecting via HTTPS") exit(1) return s_w def display_results(links): """ Display results - Total unique external references, all external reference links :param links: All links to be displayed """ num = 1 # Used to start a numbered list of links print("""\ ====================== Unique External References: %s ====================== | External Links | ======================""" % unique_refs(links)) for link in links: print("%d: %s" % (num, link)) num += 1 print() return def site_parse(): """ Grab the site requested from the command line :return str: String of requested site """ try: parse_url = sys.argv[1].split("//")[-1] except: parse_url = "null" return parse_url def conn_type_parse(): """ Return the port which the connection should be made on :return int: Port which connection should be made on """ conn_port = 80 try: conn_type = sys.argv[1].split("//")[0] except: conn_type = "http" if not conn_type.find("https"): conn_port = 443 return conn_port def check_connection_succ(text): """ Exits program if any HTTP responce code other than 200 is met :param str: full HTTP responce """ # If the website is using HTTP/2 and has no HTTP/1.1 options # you will receive an error here since it returns a 301 code if text.split("\n")[0].find("200") == -1: print("Error on HTTP request") print("HTTP Return Code: %s" % text.split("\n")[0]) exit(1) return def main(): global HOST_REQUEST global PORT_REQUEST HOST_REQUEST = site_parse() PORT_REQUEST = conn_type_parse() generate_request() # Open connection based on request type if PORT_REQUEST == 443: w = HTTPS_CONNECTION() elif PORT_REQUEST == 80: w = HTTP_CONNECTION() # Obtain data from response fr = "" while True: r = w.recv(32768) if not r: break fr += r.decode() # Close socket w.close() # Check return contents check_connection_succ(fr) # Gather results and display them js_refs = find_ref(fr) external_links = pull_external_url(js_refs) display_results(external_links) if __name__ == "__main__": main()