'(/^00[1-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|00[1-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|00[1-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|00[1-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|00[1-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|00[1-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|00[1-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|00[1-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|0[1-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|0[1-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|0[1-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|0[1-9][0-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|0[1-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|0[1-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|0[1-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|0[1-9][0-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|[1-5][0-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|[1-5][0-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|[1-5][0-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|[1-5][0-9][0-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|[1-5][0-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|[1-5][0-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|[1-5][0-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|[1-5][0-9][0-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|6[0-5][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|6[0-5][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|6[0-5][0-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|6[0-5][0-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|6[0-5][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|6[0-5][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|6[0-5][0-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|6[0-5][0-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|66[0-5](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|66[0-5](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|66[0-5](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|66[0-5](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|66[0-5](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|66[0-5](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|66[0-5](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|66[0-5](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|66[7-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|66[7-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|66[7-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|66[7-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|66[7-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|66[7-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|66[7-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|66[7-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|6[7-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|6[7-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|6[7-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|6[7-9][0-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|6[7-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|6[7-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|6[7-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|6[7-9][0-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))|[7-8][0-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][0-9][1-9]))|[7-8][0-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][0-9][1-9][0-9]))|[7-8][0-9][0-9](?=\-?[0-9][1-9](?=\-?[0-9][1-9][0-9][0-9]))|[7-8][0-9][0-9](?=\-?[0-9][1-9](?=\-?[1-9][0-9][0-9][0-9]))|[7-8][0-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][0-9][1-9]))|[7-8][0-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][0-9][1-9][0-9]))|[7-8][0-9][0-9](?=\-?[1-9][0-9](?=\-?[0-9][1-9][0-9][0-9]))|[7-8][0-9][0-9](?=\-?[1-9][0-9](?=\-?[1-9][0-9][0-9][0-9]))$'
'(/^00[1-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|00[1-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|00[1-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|00[1-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|00[1-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|00[1-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|00[1-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|00[1-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|0[1-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|0[1-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|0[1-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|0[1-9][0-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|0[1-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|0[1-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|0[1-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|0[1-9][0-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|[1-5][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|[1-5][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|[1-5][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|[1-5][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|[1-5][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|[1-5][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|[1-5][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|[1-5][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|6[0-5][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|6[0-5][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|6[0-5][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|6[0-5][0-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|6[0-5][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|6[0-5][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|6[0-5][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|6[0-5][0-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|66[0-5](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|66[0-5](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|66[0-5](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|66[0-5](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|66[0-5](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|66[0-5](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|66[0-5](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|66[0-5](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|66[7-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|66[7-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|66[7-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|66[7-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|66[7-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|66[7-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|66[7-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|66[7-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|6[7-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|6[7-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|6[7-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|6[7-9][0-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|6[7-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|6[7-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|6[7-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|6[7-9][0-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))|[7-8][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][0-9][1-9]))|[7-8][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][0-9][1-9][0-9]))|[7-8][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[0-9][1-9][0-9][0-9]))|[7-8][0-9][0-9](?=\\-?[0-9][1-9](?=\\-?[1-9][0-9][0-9][0-9]))|[7-8][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][0-9][1-9]))|[7-8][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][0-9][1-9][0-9]))|[7-8][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[0-9][1-9][0-9][0-9]))|[7-8][0-9][0-9](?=\\-?[1-9][0-9](?=\\-?[1-9][0-9][0-9][0-9]))$'
Its not just a language, but it runs inside languages
Whitespace Free
Many cups of coffee per character
<input pattern="(java|ecma)script" required>
<(.*?)>
SELECT * FROM YourDatabase WHERE YourField REGEXP 'ty$';
url(r'^post/detail/(?P<slug>[a-z_A-z]+)$', home),
Quantifiers *, +, ?
Character classes [abc], [a-z], [^a-z] and \w
Groups a(bc)+
Anchors ^ and $
\w+@[\w_-]\.[a-z]{2,}
(+\d{1,2})?(-|()?(\d{10}|(\d{3}[-|)]{0,2}){2}\d{4})$
p = re.compile(r"""
(\+\d{1,2})? # Optional Country Code +1 or perhaps +22
(-|\()? # Optional hyphen or open parenths for the area code.
(\d{10} # Try and match ten digits exactly
| # Else
(\d{3} # Match three digits
[\-|\)]{0,2}) # Hypen or close parenths zero through 2 times (to account for -))
{2} # Repeat to match the next three digits
\d{4}) # The remaining four digits
$ # Ensure nothing else follows
""", re.VERBOSE)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-2-c4686f28b6d7> in <module>() ----> 1 p = re.compile(r""" 2 3 (\+\d{1,2})? # Optional Country Code +1 or perhaps +22 4 (-|\()? # Optional hyphen or open parenths for the area code. 5 (\d{10} # Try and match ten digits exactly NameError: name 're' is not defined
$ tree -h -I '*.pyc|__pycache__'
- Getting a clean look at the file tree
$ cat syslog auth.log | grep -E '([128-255]{3}\.)*?[100-255]{3}'
- Searching for certain IPv4 addresses
from bs4 import BeautifulSoup
import requests
import re
def get_speech_links():
"""Scrapes a list of links to speeches"""
base_url = 'http://millercenter.org/president/speeches/'
r = requests.get(base_url)
soup = BeautifulSoup(r.text, 'html.parser')
little_soup = soup.find_all("a", {"href": re.compile(r"""
\w+ # One or more word characters
/ # A forward slash
\w+ # One or more word characters
/speeches/ # Literally
.* # Any number of characters, or none
$ # Ensure the end of the string is met, no spaces allowed!
""", re.VERBOSE)})
links = {a['href'] for a in little_soup}
return links
links = list(get_speech_links())
links
^\s*\S+(?:\s+\S+){4,}\s*$
with open('iliad.txt', 'r') as iliad:
iliad = iliad.read()
print(iliad[10052:10475])
pattern = re.compile(r'''
\s* # A Space Character, or not.
(\w+) # One or more words Characters
\s* # Another Space Character, or not.
''', re.X)
matches = pattern.findall(iliad)
print("With Reg Ex:", len(matches))
print("With the python split method:", len(iliad.split()))
pattern = re.compile(r'''
# Matches full sentences pertaining to war.
.+ # Any Character one or more times.
\s+ # A white space character, one or more times
war # The string 'war'
\s+ # A white space character, one or more times
.+ # Any Character one or more times.
\. # A Period, literally.
''', re.X)
matches = pattern.findall(iliad)
print("With Reg Ex:", len(matches))
https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/HTML5/Constraint_validation
(\+\d{1,2})?(-|\()?(\d{10}|(\d{3}[\-|\)]{0,2}){2}\d{4})$
^[a-z A-Z]{1,}\S$
\w+@\w\.[a-z]{2,}
\w+@[\w_-]\.[a-z]{2,}
%%html
<input name='zipcode' placeholder="zipcode" pattern=\d{5}>
%%html
<label>Type 'javascript' or 'ecmascript'</label>
<input pattern="(java|ecma)script" required>
<button>Submit</button>
<style>
input:invalid {
border: 1px solid red;
}
input:valid {
border: 1px solid green;
}
</style>
%%html
<input type="text" placeholder="Name" pattern="^[a-z A-Z]{1,}\S$" required><br>
<input type="text" placeholder="Phone Number" pattern='(\+\d{1,2})?(-|\()?(\d{10}|(\d{3}[\-|\)]{0,2}){2}\d{4})$' required><br>
<input type="text" placeholder="E-mail" pattern='\w+@\w\.[a-z]{2,}' required><br>
<button>Done</button>
<style>
input:invalid {
border: 1px solid red;
}
input:valid {
border: 1px solid green;
}
</style>
Python RegEx "Named Groups" ----> Parameter Capturing
urlpatterns = [
url(r'^admin/', admin.site.urls),
url(r'^post/detail/(?P<slug>[a-z_A-z]+)$', home),
]
from django.shortcuts import render, get_object_or_404
#from .models import Post
def post_detail_view(request, slug):
post = Post.objects.get(slug=slug)
# get_object_or_404()
return render(request, 'presidents.html', {'post': post})
(?=...)
Positive Lookahead - assert that a pattern matches in the future(?!...)
Negative Lookahead - assert a pattern does not match in the future
(?<=...)
Positive Lookbehind - assert a pattern that must preceed the next expression
(?<!...)
Negative Loohbehind - assert a pattern that must not preceed the next expression[^!@#$%&*()_+]
Match anything except the charset
re.compile('https?://')
will match url addresses with or without the optinal 's'
re.compile('https*://')
may match 'httpssssss://'
<div>.*?</div>
Considers the future
note: Don't actually parse html with regular expressions.
RegExps are a Swiss army knife
Remeber that RegEx has is a language with commands, loops, and flow control
Using Rgular Expressions may be a mistake!
Verbose / Extended Formatting
Resources: