#!/usr/bin/env python # coding: utf-8 # # Introducing Python # ## 09/05/2023 # # <a href="?print-pdf">print view</a><br> # <a href="pythonintro.ipynb">notebook</a> # In[1]: get_ipython().run_cell_magic('html', '', '<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>\n<script src="https://requirejs.org/docs/release/2.3.6/minified/require.js"></script>\n<script src="https://bits.csb.pitt.edu/asker.js/lib/asker.js"></script>\n<style>\n.reveal pre { font-size: 100%; overflow-x: auto; overflow-y: auto;}\n.reveal h1 { font-size: 2em}\n.reveal ol {display: block;}\n.reveal ul {display: block;}\n.reveal .slides>section>section.present { max-height: 100%; overflow-y: auto;}\n\n.jp-OutputArea-output { padding: 0; }\n</style>\n\n\n<script>\n$3Dmolpromise = new Promise((resolve, reject) => { \n require([\'https://3Dmol.org/build/3Dmol.js\'], function(){ \n resolve();});\n});\nrequire([\'https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.2.2/Chart.js\'], function(Ch){\n Chart = Ch;\n});\n\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n\n//the callback is provided a canvas object and data \nvar chartmaker = function(canvas, labels, data) {\n var ctx = $(canvas).get(0).getContext("2d");\n var dataset = {labels: labels, \n datasets:[{\n data: data,\n backgroundColor: "rgba(150,64,150,0.5)",\n fillColor: "rgba(150,64,150,0.8)", \n }]};\n var myBarChart = new Chart(ctx,{type:\'bar\',data:dataset,options:{legend: {display:false},\n scales: {\n yAxes: [{\n ticks: {\n min: 0,\n }\n }]}}});\n};\n\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n\n</script>\n') # # Types and Variables # # *value* - your data # # *type* - what kind of data it is # # *variable* - name of your data, how you access it # # Types # # Built In: # * Numerical: integers, floating point, complex # * Boolean (True, False) # * None # * Sequences: strings, tuples, lists, sets, dictionaries # * Callable (functions) # # <tt>type</tt> # # In[2]: type(3) # In[3]: type(3.0) # In[4]: type("Hello") # In[5]: type(min) # # Numbers # # ints vs floats # In[6]: get_ipython().run_cell_magic('html', '', '\n<div id="types" style="width: 500px"></div>\n<script>\n\n\tjQuery(\'#types\').asker({\n\t id: "types",\n\t question: "If <tt>a</tt> and <tt>b</tt> are of type <tt>int</tt> and <tt>x</tt> and <tt>y</tt> are of type <tt>float</tt> which of the following are true?",\n\t\tanswers: ["Speed","Associativity","Both of the above","Neither of the above"],\n extra: ["<tt>a + b</tt> is faster than <tt>x + y</tt>",\n "<tt>(a+b)-b</tt> will always equal <tt>a</tt> but <tt>(x+y)-y</tt> may not equal <tt>x</tt>"\n ],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Arithmetic Operators # # `+`, `-`, `*`, `/` Hopefully self-explanatory # # `%` modulus (remainder after division) # # `**` exponentiation $x^y$ = `x**y` # # `//` integer (floor) division **division is different in Python2 vs 3** # In[ ]: 5+1 * 3/2 # In[7]: get_ipython().run_cell_magic('html', '', '<div id="aops" style="width: 500px"></div>\n<script>\n\n\tjQuery(\'#aops\').asker({\n\t id: "aops",\n\t question: "What prints out?",\n\t\tanswers: ["6","6.5","7","7.5","9"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # In[ ]: 5+1 * 3//2 # In[8]: get_ipython().run_cell_magic('html', '', '<div id="aops2" style="width: 500px"></div>\n<script>\n\n\tjQuery(\'#aops2\').asker({\n\t id: "aops2",\n\t question: "What prints out?",\n\t\tanswers: ["6","6.5","7","7.5","9"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Assignment Operators # # Can perform an operation while assigning # # a *op*= b # # is # # a = a *op* b # In[9]: x = 10 x += 1 x # # Strings # # strings are a sequence of characters # # ### String literals # * "you can use double quotes" # * 'you can using single quotes (more common python style)' # * "the difference is how easy it is to include a ' character" 'or a " character' # * special characters are _escaped_ with a backslash # * so must always escape backslash itself # * '\n' newline # * '\\\\' backslash # * '\t' tab # * '\'' single quote # # # Multiline string literals # In[10]: "you can end a line with a slash\ and it will continue on the next line" # Adjacent string literals are automatically concatenated # In[11]: 'hi ' 'bye' # Triple quoted strings - for large blocks of text with newlines, commonly used as documentation: # In[12]: '''There are three quotes at the start and end''' # In[ ]: print('"\\t'"'") # In[13]: get_ipython().run_cell_magic('html', '', '<div id="strq" style="width: 500px"></div>\n<script>\n\n var divid = \'#strq\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What prints out?",\n\t\tanswers: ["\\ \'\\"",\'"\\ t"\',\n "\\"\\\\t\'\\"\'",\'"\\\\t\\\'\',"Error"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Variables # # Data values are accessed through _references_ to the value. # # A reference is a name for the memory location of the value. # Every value exists somewhere in memory and has an address. # # A variable is created when it is _bound_ to a value. It is impossible to have an uninitialized variable in python (but can be None). # # The type of a variable is the type of the value it is bound to # In[ ]: x = 3 y = x y = y + 1 print(x,y) # In[14]: get_ipython().run_cell_magic('html', '', '<div id="varsq" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#varsq\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What prints out?",\n\t\tanswers: ["3 3","3 4","4 3","4 4"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Objects # # Everything is an object! # # An object is a value with set of _attributes_ # # Attributes that are callable are called _methods_ and can work on the object data # # Attributes are accessed with the '.' operator # # The attributes of an object can be listed with <tt>dir</tt> # # Strings are Objects # # A string is an object that has several methods for manipulating the string data. # # In[15]: s = 'Hello World' print(s.upper()) print(s.split()) # <a href="https://docs.python.org/3/library/stdtypes.html#string-methods">String Methods Documentation</a> # In[16]: print(dir(s)) # # Numbers are Objects # # Since everything is an object... # In[17]: x = 3.0 # In[18]: x.is_integer() # In[19]: print(dir(x)) # # Container Objects # # A container object has _items_ that are accessed with the <tt>[]</tt> operator # # They hold an arbitrary number of item objects # # The <tt>len</tt> method returns the number of items # # Strings are an example of a container object # In[20]: s = "Hello" len(s) # In[ ]: s = "Hello" # In[21]: get_ipython().run_cell_magic('html', '', '<div id="sindex" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#sindex\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What is the value of s[1]?",\n\t\tanswers: ["Hello","H","e","o","None"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # <img src=""> # # Lists # # A list is an ordered container of arbitrary objects. # # A list is defined by comma separated items in square brackets `[]`. # In[22]: mylist = [1,3.0,"cat", 9+2] print (mylist) # In[23]: mylist[0] # Lists are objects and have a number of built-in methods: # In[24]: print(dir(mylist)) # In[ ]: l = [] l.append(5) l.append(1) l.append(3) l.sort() # In[25]: get_ipython().run_cell_magic('html', '', '<div id="listq" style="width: 500px"></div>\n<script>\n\n var divid = \'#listq\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What is the value of l?",\n\t\tanswers: ["3","[3]","[5,1,3]","[1,3,5]","[3,1,5]"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Functions (callables) # # # A function is an encapsulation of code; a set of statements that can # be executed on request and returns a value. # # Functions are objects. # # A method is a function that is an attribute of an object. # # A function takes _arguments_ and returns a result (maybe None) # # The value of a callable type is the address of executable code # In[ ]: len("Luke, I am your father.") # this function takes one argument divmod(13,4); # this function takes two arguments # # Defining a function # In[ ]: def square(x): return x * x # - <tt>def</tt> starts definition # - The function name is an identifier like a variable name # - good function names are a critical part of good coding style # - bad: foo, dostuff, process_data # - also bad: ReadInFromFileComputeCorrelationAndOutput # - good: ReadExpressionData, ComputeCorrelation, OutputCorrelationMatrix # - _Parameters_ definine what arguments that function takes # - parameters are _bound_ to the values passed to the function # - Statements are the body of the function; **must be indented** # - Return statement exits function and returns specified value # - if omitted, <tt>None</tt> is returned # - Function definition ends when no more indentation (**whitespace significant!**) # # In[ ]: def twice(x): return x*2 dbl = twice #functions are objects print(dbl(4)) # In[26]: get_ipython().run_cell_magic('html', '', '<div id="dbl" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#dbl\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What prints out?",\n\t\tanswers: ["8","4","0","<function twice at 0x11115ea28>","None"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Function Scope # # A function's parameters are bound to the passed value. # That is, it's the same as if the parameter was set equal to the passed value (ex, x=4). # # Parameters and variables bound (assigned to) in the function have _local scope_ # # _global_ variables defined outside the function can only be read. # # In[27]: x=4 y=3 def incr(x): x = x + 1 return x print(x,incr(x),x) # In[28]: get_ipython().run_cell_magic('html', '', '<div id="func1" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#func1\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What prints out?",\n\t\tanswers: ["4 4 4","4 5 5","4 5 4","4 5 6","Error"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # In[ ]: x=4 y=3 def incr(): y = y + 1 return y print(y,incr()) # In[29]: get_ipython().run_cell_magic('html', '', '<div id="func2" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#func2\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What prints out?",\n\t\tanswers: ["3 3","3 4","4 4","Error"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n \n</script>\n') # Default Parameters # ------------------ # Default values for parameters can be given. This makes it easy to have # optional arguments that take reasonable defaults if not specified. # In[30]: def foo(x,y=0,z=1): return (x+y)*z foo(2) # In[31]: foo(2,3) # In[32]: foo(2,3,4) # Calling Functions # ----------------- # Functions are called using parens <tt>()</tt>. # # It is an error to call a function # with an incompatible number of arguments. # # _Named_ arguments allow you to specify arguments in a different order # than defined. # # Unnamed arguments (passed in the order defined) must all be # specified before any named arguments. # In[33]: foo(z=2,y=1,x=3) # In[34]: foo(y=1,x=3) # In[ ]: def foo(x,y=0,z=1): return (x+y)*z # In[35]: get_ipython().run_cell_magic('html', '', '<div id="func3" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#func3\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What does <tt>foo(z=2,4)</tt> return?",\n\t\tanswers: ["8","4","5","10","An Error"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n \n</script>\n') # Lambda Functions # ---------------- # The <tt>lambda</tt> keyword can be used to generate an anonymous (unnamed) function # object. # # The return value of this anonymous <tt>lambda</tt> function is the value of the specified expression. # # Lambda expressions tend to make your code more compact but less readable, # so it's probably best to avoid their use for all but the simplest functions. # In[ ]: foo = lambda x: x**2 #this squares x # is equivalent to # In[ ]: def foo(x): return x**2 # Built-in Functions # ------------------ # There are a huge number of functions built into the python language and # even more are included in standard modules. A few examples: # # * `abs` - absolute value # * `len` - length of a sequence (string, list, etc) # * `min`,`max` - returns smallest/largest item in a sequence # * `sorted` - given a list, returns a sorted copy of that list # * `type` - returns type of an object # * `map` - applies a function to every element of a sequence and returns a list of the result # * `filter` - applies a function to every element of a sequence and returns a # list of just those elements where the function evaluates to true # In[36]: list(map(ord, "hello")) #ord returns ASCII code of string of length 1 # In[37]: list(filter(lambda x: x > 0, [1.0,0.4,-0.3,-1.3,4])) # # What is truth? # # Every object has a Boolean value # In[38]: bool(None),bool(False),bool(True) # For numerical types 0 is false # In[39]: bool(0),bool(0.0),bool(-100) # Empty collections are false # In[40]: bool([]),bool(''),bool([False]),bool([0]) # In[41]: get_ipython().run_cell_magic('html', '', '<div id="falseisfalse" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#falseisfalse\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What is the value of bool(\'false\')?",\n\t\tanswers: ["True","False","None","Error"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n \n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Comparison Operators # # Return a boolean value # # `<,>,!=,==,<=,>=` # In[42]: 1 < 3 # In[43]: "hello" != "hi" # In[44]: [1,2,3] == [1,2,3], [1,2,3] == [1,2,3.14] # In[45]: x = 3; y = 4; x >= y # In[ ]: list(filter(round, [1.0,0.4,-0.3,-1.3,4])) # In[46]: get_ipython().run_cell_magic('html', '', '<div id="filterlam" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#filterlam\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What is the result of this application of filter?",\n\t\tanswers: ["[1, 0, 0, -1, 4]","[1.0, 0.4, -0.3, -1.3, 4]","[1.0, -1.3, 4]","[1,-1,4]","[]","An Error"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n \n</script>\n') # # Introspection # # `dir` and `help` can be used to figure out what methods an object has and what they do. # In[47]: help(round) # File Objects # ------------ # A file object provides an interface for reading and writing files. # # Files, unlike memory, are **accessed sequentially** (like reading a book). # # To create a file object use the `open` function: # # `fileobject = open(filename, mode)` # # Where filename is a string that is either a relative path from the # current working directory (e.g., file.txt if file.txt is in the current # directory) or an absolute path (e.g. `/home/user/dkoes/tmp/file.txt`). # File Mode # ---------- # # mode is a string that specifies what you are going to do with the file. # * 'r' - file must already exist and will only be read from (default) # * 'w' - file is created or truncated (delete what's already there) and can only be written to # * 'a' - file is open appended to (does not delete existing file) and can only be written to # # It is also possible to open files for both read/write access ('r+') but this # is tricky and generally not necessary. # # Manipulating File Objects (Methods) # ------------------------ # * `close` - closes the file when you are done with it # * `read` - return the entire file as a string (can also specify optional size argument) # * `readline` - return a single line from the file, returned string includes '\n' # * `readlines` - return lists of all lines # * `write` - writes a passed string to the file # * `seek` - set current position of the file; seek(0) starts back at beginning # # # In[48]: f = open('../files/brca1.fasta') f.readline() # In[ ]: f = open('../files/brca1.fasta') f.read() line = f.readline() # In[49]: get_ipython().run_cell_magic('html', '', '<div id="filer" style="width: 500px"></div>\n<script>\n$(\'head\').append(\'<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />\');\n\n var divid = \'#filer\';\n\tjQuery(divid).asker({\n\t id: divid,\n\t question: "What is the value of line?",\n\t\tanswers: ["The first line in the file","The second line in the file",\n "The last line in the file","An empty line","An error is generated"],\n server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",\n\t\tcharter: chartmaker})\n \n$(".jp-InputArea .o:contains(html)").closest(\'.jp-InputArea\').hide();\n\n</script>\n') # # Exercise # # What percent of this string consists of g or c? # # atattaggtttttacctacccaggaaaagccaaccaacctcgatctcttgtagatctgttctctaaacgaactttaaaatctgtgtagctgtcgctcggctgcatgcctagtgcacctac # # Create a `.py` text file using Jupyter. # # # Exercise # In[ ]: get_ipython().system('wget https://MSCBIO2025.github.io/files/brca1.fasta') # How can you extract the gene name (second column) from the first line of`brca1.fasta`? # # How many As, Ts, Cs, and Gs are there on the _second_ line of `brca1.fasta`? # # Write a function that takes a file name as an argument and prints out the gene name and percentage of G's and C's in the first line of the sequence. # # **Hint**: Checkout `split`, `count`, and `strip` methods of `str`