209 lines
5.9 KiB
Python
209 lines
5.9 KiB
Python
import socket, ssl # Used for HTTP(s) socket connection
|
|
import sys # Used for system arguments
|
|
|
|
# Global Variables
|
|
# HOST_REQUEST - Used to define what host is being connected to
|
|
# PORT_REQUEST - Used to define what port should be connected to
|
|
# USER_AGENT - Used to spook the requesting user agent on the HTTP request
|
|
# REQUEST - Used to define the HTTP request send to the remote host
|
|
global HOST_REQUEST
|
|
global PORT_REQUEST
|
|
global USER_AGENT
|
|
global REQUEST
|
|
|
|
def generate_request():
|
|
"""
|
|
Generate raw HTTP request and set variables for global use
|
|
"""
|
|
global USER_AGENT
|
|
global REQUEST
|
|
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0"
|
|
REQUEST = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\nAccept-Encoding: Identity\r\nUser-Agent: %s\r\n\r\n" % (HOST_REQUEST, USER_AGENT)
|
|
return
|
|
|
|
def pull_external_url(text):
|
|
"""
|
|
Pulls all external references from passed parameter (Anything with "//" is assumed to be an external reference)
|
|
:param text: Array of potential references
|
|
:return List[str]: Array of all external references
|
|
"""
|
|
external_ref = []
|
|
for work in text:
|
|
# It is assumed that all external references start with "//"
|
|
# All internal references *should* start with "/"
|
|
work_split = work.split("//")
|
|
if len(work_split) > 1:
|
|
external_ref.append(work_split[-1])
|
|
return external_ref
|
|
|
|
def pull_url(text):
|
|
"""
|
|
Extract all references within quotes
|
|
:param text: String of potential link full line
|
|
:return str: String of potential stripped link
|
|
"""
|
|
try:
|
|
url_start = text.index("\"") + 1
|
|
url_end = text.find("\"",url_start)
|
|
except:
|
|
url_start = text.find("\'") + 1
|
|
url_end = text.find("\'",url_start)
|
|
|
|
return(str(text[url_start:url_end]))
|
|
|
|
def find_ref(text):
|
|
"""
|
|
Look for all links containing ".js" extension
|
|
:param text: String of full HTTP response
|
|
:return List[str]: Array of all links to .js extensions
|
|
"""
|
|
found_ref = []
|
|
for line in text.split('\n'):
|
|
try:
|
|
er = line.index(".js")
|
|
found_ref.append(pull_url(line))
|
|
pull_url(line)
|
|
except:
|
|
# If line does not contain .js continue
|
|
continue
|
|
return found_ref
|
|
|
|
def unique_refs(links):
|
|
"""
|
|
Determine a list of unique references by base url
|
|
:param links: Array of all external references
|
|
:return List[srt]: Array of all unique base urls
|
|
"""
|
|
base_url = []
|
|
for link in links:
|
|
base_url.append(link.split("/")[0])
|
|
|
|
# Remove duplicate entries after stripping down to base url via "set"
|
|
unique_base_url = list(set(base_url))
|
|
return len(unique_base_url)
|
|
|
|
def HTTP_CONNECTION():
|
|
"""
|
|
Perform a HTTP connection to the globally referenced HOST/PORT
|
|
:return socket: Socket to referenced HOST/PORT
|
|
"""
|
|
try:
|
|
# Create request socket
|
|
w = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
w.connect((HOST_REQUEST, PORT_REQUEST))
|
|
w.send(REQUEST.encode())
|
|
except:
|
|
print("Error Connecting via HTTP")
|
|
exit(1)
|
|
return w
|
|
|
|
def HTTPS_CONNECTION():
|
|
"""
|
|
Perform a HTTPS connection to the globally referenced HOST/PORT
|
|
:return socket: Socket to referenced HOST/PORT
|
|
"""
|
|
try:
|
|
# Create request socket
|
|
context = ssl.create_default_context()
|
|
w = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
s_w = context.wrap_socket(w, server_hostname=HOST_REQUEST)
|
|
s_w.connect((HOST_REQUEST, PORT_REQUEST))
|
|
s_w.send(REQUEST.encode())
|
|
except:
|
|
print("Error Connecting via HTTPS")
|
|
exit(1)
|
|
return s_w
|
|
|
|
def display_results(links):
|
|
"""
|
|
Display results - Total unique external references, all external reference links
|
|
:param links: All links to be displayed
|
|
"""
|
|
num = 1 # Used to start a numbered list of links
|
|
print("""\
|
|
======================
|
|
Unique External
|
|
References: %s
|
|
======================
|
|
| External Links |
|
|
======================""" % unique_refs(links))
|
|
for link in links:
|
|
print("%d: %s" % (num, link))
|
|
num += 1
|
|
print()
|
|
return
|
|
|
|
def site_parse():
|
|
"""
|
|
Grab the site requested from the command line
|
|
:return str: String of requested site
|
|
"""
|
|
try:
|
|
parse_url = sys.argv[1].split("//")[-1]
|
|
except:
|
|
parse_url = "null"
|
|
return parse_url
|
|
|
|
def conn_type_parse():
|
|
"""
|
|
Return the port which the connection should be made on
|
|
:return int: Port which connection should be made on
|
|
"""
|
|
conn_port = 80
|
|
try:
|
|
conn_type = sys.argv[1].split("//")[0]
|
|
except:
|
|
conn_type = "http"
|
|
if not conn_type.find("https"):
|
|
conn_port = 443
|
|
return conn_port
|
|
|
|
def check_connection_succ(text):
|
|
"""
|
|
Exits program if any HTTP responce code other than 200 is met
|
|
:param str: full HTTP responce
|
|
"""
|
|
# If the website is using HTTP/2 and has no HTTP/1.1 options
|
|
# you will receive an error here since it returns a 301 code
|
|
if text.split("\n")[0].find("200") == -1:
|
|
print("Error on HTTP request")
|
|
print("HTTP Return Code: %s" % text.split("\n")[0])
|
|
exit(1)
|
|
return
|
|
|
|
def main():
|
|
global HOST_REQUEST
|
|
global PORT_REQUEST
|
|
HOST_REQUEST = site_parse()
|
|
PORT_REQUEST = conn_type_parse()
|
|
generate_request()
|
|
|
|
# Open connection based on request type
|
|
if PORT_REQUEST == 443:
|
|
w = HTTPS_CONNECTION()
|
|
elif PORT_REQUEST == 80:
|
|
w = HTTP_CONNECTION()
|
|
|
|
# Obtain data from response
|
|
fr = ""
|
|
while True:
|
|
r = w.recv(32768)
|
|
if not r:
|
|
break
|
|
fr += r.decode()
|
|
|
|
# Close socket
|
|
w.close()
|
|
|
|
# Check return contents
|
|
check_connection_succ(fr)
|
|
|
|
# Gather results and display them
|
|
js_refs = find_ref(fr)
|
|
external_links = pull_external_url(js_refs)
|
|
display_results(external_links)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|