source: tcl/turl-procs.tcl @ 1

Last change on this file since 1 was 1, checked in by mek, 8 years ago

initial import

File size: 19.2 KB
Line 
1#Turl, A tinyurl.com clone
2#Copyright (C) 2003  Mat Kovach (mkovach@alal.com)
3#
4#This program is free software; you can redistribute it and/or modify
5#it under the terms of the GNU General Public License as published by
6#the Free Software Foundation; either version 2 of the License, or
7#(at your option) any later version.
8#
9#This program is distributed in the hope that it will be useful,
10#but WITHOUT ANY WARRANTY; without even the implied warranty of
11#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12#GNU General Public License for more details.
13#
14#You should have received a copy of the GNU General Public License
15#along with this program; if not, write to the Free Software
16#Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18ns_log Notice "Loading turl procs"
19
20proc base62_func {} {
21   # quick hack for our base 62 ids
22   return [list  0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
23}
24
25proc get_base62 { n } {
26   # convert a 0-61 number to our base62 id
27   set base62 [base62_func]
28   return [lsearch -exact $base62 $n]
29}
30
31proc print_base62 { n } {
32   # print the index of the base62 id 0 -> 0, a -> 10, etc.
33   set base62 [base62_func]
34   return [lindex $base62 $n]
35}
36
37proc decode { num } {
38   # given a id, decode to an integer
39   # first make sure this is valid
40   set length [max_encode_length]
41   # make sure the id is correctly formated
42   if { ![regexp {([0-9a-zA-Z]+)} $num] } {
43        return -1
44   }
45   # Should really need this, but you never know
46   if { [string length $num] != $length } {
47        return -1
48   }
49   # This is the function that takes the id and creates a interger id
50   # which we use in the database
51   set max [expr $length - 1 ]
52   set output 0
53   foreach c [split $num {}] {
54      set output [expr $output + [expr [get_base62 $c] * pow(62,$max)]]
55      incr max -1
56   }
57   # return the id, as an integer
58   return [expr int($output)]
59}
60
61proc convert { m } { 
62   # given and interger, convert to an ID.
63   if { $m < 62 } {
64      return [print_base62 $m]
65   } else {
66      set line "[convert [expr $m / 62]][print_base62 [expr $m % 62]]" 
67      return $line
68   }
69}
70
71proc encode { num } {
72   # convert a number to and ID and zero pad it
73   # FIXME: we should throw and error if the id is longer than max_length
74   set tmp [convert $num]
75   set diff [expr [max_encode_length] - [string length $tmp]]
76   for {set i 0} { $i < $diff } {incr i} {
77      set tmp "0$tmp"
78   }
79   return $tmp
80}
81
82proc get_page { conn } {
83 
84   # this is the request processor
85   
86   # urls as a list
87   set url_list      [ns_conn urlv $conn]
88
89   # number of urls
90   set url_list_size [ns_conn urlc $conn]
91
92   # get our offset
93   set offset        [turl_url_offset]
94
95   # Index to start looking for requests
96   set index [expr $url_list_size - $offset ]
97
98   if { $index > 0 } {
99      # we have /xxxxx which may or may not be a redirect
100      set monitor_id [ decode  [ lindex $url_list $offset ] ] 
101      # if the page does decode it is a redirect attempt
102      if { $monitor_id != -1 } {
103         # Okay, lets see if it is a valid redirect
104         set db [ns_db gethandle]
105         set url [database_to_tcl_string_or_null $db "select url from turl where monitor_id=$monitor_id and enabled='t'"]
106         if {[empty_string_p $url]} {
107            # not a valid redirect, so we'll record an error
108            ns_log Notice "Error, id $monitor_id not valid"
109         } else {
110            # valid redirect, lets record some info and log it
111            set remote_addr [ns_conn peeraddr]
112            set referrer  [get_referrer]
113            set sql "insert into turl_click_log
114(monitor_id,remote_addr,click_time,referrer)
115values
116($monitor_id,'[DoubleApos $remote_addr]',now(),'[DoubleApos $referrer]')"
117            if [catch {ns_db dml $db $sql} errmsg] {
118               ns_log Error "Could not log click for monitor $monitor_id, from $remote_addr/$referrer"
119            }
120
121            # lets see if there is any extra information to include in the
122            # redirect
123
124            # first log for extra url info
125            set rest [lindex $url_list [expr $offset + 1] end ]
126            if { ![empty_string_p $rest] } {
127                append url "/$rest"
128            }
129
130            # now lets add any query information
131            set query [ns_conn query $conn]
132            if {![empty_string_p $query]} {
133               append url "?$query"
134            }
135
136            # do the redirection
137            ns_log Notice "redirecting to $url"
138            ns_returnredirect $url
139
140            # close the connection
141            catch {ns_conn close $conn}
142
143            # return
144            return -1
145         }
146      } 
147   }
148   
149   # if we get this far, we are not redirecting, therefore lets
150   # try to server the page
151
152   # check, this might be the index
153   if { [empty_string_p [lindex $url_list [turl_url_offset]]] } {
154      # okay, set the url to /index
155      set url "/index"
156   } else {
157      # get the url for the conenction info
158      set url [ns_conn url $conn]
159   }
160
161   # add page_ in front of the url
162   # with changes in the processor, this shouldn't be needed anymore
163   # but this has not been tested ye.
164   return "page_$url"
165}
166
167proc turl {conn ignored} {
168
169  # lets get the page
170
171  set page [get_page $conn]
172 
173  # if return was -1, they a redirection happened and we don't need to
174  # do anything.  This proc needs to be redone a bit.
175   
176  if { $page != -1 } {
177     
178      # pull the page info out and any extention
179      if { [regexp {^page_([^\.]+)(\.)?(.+)?} $page dummy file dot ext]} {
180         
181          # setup the files we might be looking for
182          set pagefile "[doc_root]${file}"
183         
184          # If we have an empty extention, add extentions to it
185          if {[empty_string_p $ext]} {
186
187              # first check if the page does not exists, if it does
188              # server that file
189              if {![valid_pagefile_p $pagefile]} {
190
191                  # okay, lets add extentions, look for a good file
192                  foreach page_ext [turl_page_ext] {
193                      if {[valid_pagefile_p "${pagefile}.${page_ext}"]} {
194
195                          # we found a valid page, update pagefile and
196                          # stop processing
197                          append pagefile ".$page_ext"
198                          break
199                      }
200                  }
201              } else {
202                  # Okay, it is not a valid page request, lets check for a directory.
203                  if { [string compare [string range [ns_conn url $conn] end end] /] != 0 } {
204                      # if we don't have a valid page but the url does not
205                      # END in a /, somebody might have been typing a dir
206                      # well try redirecting to that directory.
207                      set url "[turl_system_url][ns_conn url $conn]/"
208                      ns_returnredirect $url
209                      return 0
210                  }
211
212                  # if ext is empty, but is a valid maybe it is a directory?
213                  if { [file isdirectory $pagefile] } {
214
215                      # should we just so a directory listing? Assume yes
216                      set show_list_p 1
217
218                      # look for directory files we might be able to use
219                      # ie: index.html, index.adp, etc.
220                      foreach server_ext [get_directory_files] {
221                          if { [valid_pagefile_p "${pagefile}/$server_ext"] } {
222
223                              # Hey, we found one, update the pagefile and
224                              # stop processing
225                              append pagefile "/$server_ext"
226                              set show_list_p 0
227                              break
228                          } 
229                      }
230
231                      # Do we still show the directory listing?
232                      if { $show_list_p } { 
233
234                          # not index.adp or index.html, show a directory
235                          # listing and return
236                          ns_return 200 text/html "[turl_header [turl_system_name]]\n[directory_listing $pagefile]\n<p>\n[turl_footer]\n"
237                          catch {ns_close $conn}
238                          return 0
239                      }
240                  } else {
241
242                      # nothing to do, file not found
243                      ns_returnnotfound
244                      return
245                  }
246              } 
247          } else {
248              # Okay, lets actually just server the page and the extension
249              append pagefile ".${ext}"
250          }
251
252          # All that work, does the page even exist?
253          if {![valid_pagefile_p $pagefile]} {
254           
255              # Nope, blah!
256              ns_returnnotfound
257              return
258          }
259         
260          # Lets see if we can guess the file type, text/html, etc.
261          set type [ns_guesstype $pagefile]
262         
263          # is this a file we parse using the adp_parser?
264          if {[string match "*[ns_config ns/server/[ns_info server]/adp map]" $pagefile]} {
265
266              # yes, lets try to parse the page
267              set page [catch {ns_adp_parse -file $pagefile} error]
268              if {$page} {
269
270                  # this is atually be the error
271                  ns_return 200 text/html $error
272              } else {
273                   
274                  # Everying okay, lets return the page
275                  ns_return 200 text/html $error
276              }         
277          } else {
278              # Nope, this is not parsed, just server the file
279              switch $type {
280                  "*/*" {
281                  }
282                  default {
283                      ns_returnfile 200 $type $pagefile
284                  }
285              }
286          }
287      }
288  } else {
289
290      # Nothing found, let the user no.
291      ns_returnnotfound
292  }
293}
294
295proc get_directory_files {} {
296   # get the directory files from the server config
297   return [split [ns_config "ns/server/[ns_info server]" directoryfile] ,]
298}
299
300proc valid_pagefile_p { pagefile } {
301    # take a file and look for it on the system
302    if {[catch {set fp [open $pagefile]}]} {
303         return 0
304      } else {
305         return 1
306      }
307}
308
309proc directory_listing { dir } {
310    # taken directlry from OpenACS's reqeuest process
311    # ad_proc -private rp_html_directory_listing
312    set list "
313<table>
314<tr align=left><th>File</th><th>Size</th><th>Date</th></tr>
315<tr align=left><td colspan=3><a href=../>..</a></td></tr>
316"
317
318    # Loop through the files, adding a row to the table for each.
319    foreach file [lsort [glob -nocomplain $dir/*]] {
320        set tail [file tail $file]
321        set link "<a href=$tail>$tail</a>"
322
323        # Build the stat array containing information about the file.
324        file stat $file stat
325        set size [expr $stat(size) / 1000 + 1]K
326        set mtime $stat(mtime)
327        set time [clock format $mtime -format "%d-%h-%Y %H:%M"]
328
329        # Write out the row.
330        append list "<tr align=left><td>$link</td><td>$size</td><td>$time</td></tr>\n"
331    }
332    append list "</table>"
333    return $list
334}
335
336proc valid_page_p { url } {
337    # even a url, see if it works
338    if {[util_link_responding_p $url]} {
339
340        # if it is working, get the status
341        if { [ catch {set status [util_get_http_status $url] } ] } {
342            set status "500"
343        }
344
345    } else {
346       
347        # Borked
348        set status "500"
349    }
350
351    # if there was a bad status, return 0
352    if {$status == 500 || $status == 404} {
353        return 0
354    } else {
355
356        # every seems okay
357        return 1
358    }
359}
360
361proc get_title { url } {
362    # Attempt to get the title of the web page
363    if { [catch {set page [ns_httpget $url]}] } {
364        set title ""
365    } else {
366        if {![regexp {<title>(.*)?</title>} $page match title]} {
367           set title ""
368        }
369    }
370    return $title
371}
372
373proc print_turl_link { monitor_id title } {
374    # prints a link for the turl
375    return "<a href=\"[turl_system_url]/[encode $monitor_id]\"> $title - [turl_system_url]/[encode $monitor_id]</a><p><code>[turl_system_url]/[encode $monitor_id]</code><br>"
376}
377
378proc turl_link_to_clipboard { monitor_id } {
379    # add some stuff to try and copy the turl link to the clip board
380    return "<form>
381<input type=hidden name=turl value=\"[turl_system_url]/[encode $monitor_id]\">
382</form>
383<script>
384x = document.all.turl.createTextRange();
385x.execCommand(\"Copy\");
386</script>
387</p>"
388}
389
390proc url_count { db url } {
391    # how many times does url show up in the database (should be 0 or 1)
392    return [database_to_tcl_string $db "select count(*) from turl where url='$url'"]
393}
394
395proc get_monitor_id_from_url { db url } {
396    # give an url, get the monitor id
397    return [database_to_tcl_string $db "select monitor_id from turl where url='$url'"]
398}
399
400proc get_monitor_id { db } {
401    # get the next monitor id sequence
402    return [database_to_tcl_string $db "select nextval('turl_monitor_id_seq')"]
403}
404
405proc add_url { url } {
406
407    # trying to add a url to the database
408
409    # this regexp is for Brent Welch's Pratical Programming in TCL/Tk
410    if {![regexp {([^:]+)://([^:/]+)(:([0-9]+))?(/.*)?} $url match protocol host x serverport path]} {
411        return "<b>You entered a bad url, make sure you have started the url with http://</b>"
412    } else {
413       
414        # we got a url from that mess, let see if it works.
415        if {![valid_page_p $url]} {
416            # Nope, let the user know
417            return "Error getting url<P>$url"
418        } else {
419
420            # we have a valid url, time to add it to the database
421            ns_log Notice "url to add: $url"
422
423            # Lets get the title
424            set title [get_title $url]
425
426            # Open up a database handler
427            set db [ns_db gethandle]
428
429            # Lets prepare the url to be database friendly
430            set QQurl [DoubleApos $url]
431
432            # Check the database to see if the url is already in the database
433            if { [url_count $db $QQurl] > 0 } {
434
435                # yep, somebody did it first, lets just update the database
436                # showing that the link works
437                set monitor_id [get_monitor_id_from_url $db $QQurl]
438                set sql "update turl set checked_date = now(),working_date = now(),enabled='t' where monitor_id = $monitor_id"
439                catch {ns_db dml $db $sql}
440            } else {
441               
442                # Hey, it is a new url
443
444                # get the next id
445                set monitor_id [get_monitor_id $db]
446
447                # this is just a hack to allow the admin to load the database
448                # out of sequence and not cause errors
449                while { [url_count $db $QQurl] > 0 } {
450                    set monitor_id [get_monitor_id $db]
451                }
452
453                # lets stuff it into the database
454                set sql "insert into turl (monitor_id,url,enabled,title,entered_date,checked_date,working_date) values ($monitor_id,'$QQurl','t','[DoubleApos $title]',now(),now(),now())"
455                if { [catch {ns_db dml $db $sql} errmsg] } {
456
457                    # Opps database error.
458                    return "Database error"
459                }
460            }
461
462            # return the link, the user won't know if the url was previously
463            # entered or not.
464            return "[print_turl_link $monitor_id $title]<p>[ turl_link_to_clipboard $monitor_id]"
465        }
466    }
467}
468
469proc list_of_urls { } {
470   
471    # Display a list of urls
472    set db [ns_db gethandle]
473    set urls ""
474    set selection [ns_db select $db "select monitor_id,url,title from turl order by monitor_id"]
475    while { [ns_db getrow $db $selection] } {
476        set_variables_after_query
477        set turl "[turl_system_url]/[encode $monitor_id]"
478        set link "<a href=\"$turl\">$turl</a>"
479        if {![empty_string_p $title]} { 
480            append urls "<li>$title - $link</li>\n"
481        } else {
482            append urls "<li>$link</li>\n"
483        }
484    }
485    return $urls
486}
487
488proc click_count { {limit ""} } {
489   
490    # get the click count for links using limit to limit the amount of
491    # urls returned
492    set db [ns_db gethandle]
493    set output ""
494    set sql "select t.monitor_id as monitor_id, t.title as title, count(*) as clicks from turl as t, turl_click_log as c
495where c.monitor_id = t.monitor_id
496group by t.monitor_id,t.title
497order by clicks desc"
498    if {![empty_string_p $limit] } {
499         append sql " limit $limit"
500    }
501    set selection [ns_db select $db $sql]
502    while { [ns_db getrow $db $selection] } { 
503        set_variables_after_query
504        set turl "[turl_system_url]/[encode $monitor_id]"
505        set link "<a href=\"$turl\">$turl</a>"
506        append output "<tr><td>$link</td><td><a href=\"/clicks-for-one?turl_id=[encode $monitor_id]\">$title</a></td><td align=\"center\">$clicks</td></tr>\n"
507    }
508    return $output
509}
510
511proc clicks_for_one_url { monitor_id } {
512
513    # display the clicks for, well, one url
514    set db [ns_db gethandle]
515    set output ""
516    append output "<B>[get_url_info $db $monitor_id ]</B><hr align=\"center\" width=\"95%\">\n"
517    append output "<table width=\"99%\" align=\"center\">\n"
518    append output "<tr align=\"center\"><td>Remote Address</td><td>Referrer</td><td>Clicks</td></tr>\n"
519    set sql "select remote_addr as addr ,referrer,count(*) as clicks
520from turl_click_log
521where monitor_id = $monitor_id
522group by referrer,addr
523order by clicks desc"
524    set selection [ns_db select $db $sql]
525    while { [ns_db getrow $db $selection] } {
526        set_variables_after_query
527        append output "<tr align=\"center\"><td>$addr</td><td>$referrer</td><td>$clicks</td></tr>\n"
528    }
529    append output "</table>\n"
530    return $output
531}
532 
533proc get_url_info { db monitor_id } {
534   
535     # get the information about a url
536     set output "" 
537     set selection [ns_db 0or1row $db "select title,url,entered_date from turl where monitor_id = $monitor_id"]
538     set_variables_after_query
539     if {![empty_string_p $title]} {
540         append output "$title -"
541     }
542     append output "$url<br>entered:$entered_date"
543     return $output
544}
545
546proc check_urls {} {
547
548    # This checks the urls in the database (making sure they still work)
549    set db [ns_db gethandle]
550    set monitor_ids [database_to_tcl_list $db "select monitor_id from turl"]
551    set n_urls [llength $monitor_ids]
552    set start_time [ns_time]
553    if { $n_urls == 0 } {
554        ns_log Notice "Found no urls to monitor"
555    } else {
556        ns_log Notice "Starting to check $n_urls urls"
557        foreach monitor_id $monitor_ids {
558            set selection [ns_db 0or1row $db "select url,date_part('day',now() - working_date) as non_working_days from turl where monitor_id = $monitor_id "]
559            if { $selection ==""} {
560                #this should never happen
561                continue
562            }
563            set_variables_after_query
564            # update the database, that the url is being checked
565            set sql "update turl set checked_date = now() where monitor_id = $monitor_id"
566            if { [catch {ns_db dml $db $sql} errmsg] } {
567                ns_log Error "Database error: $errmsg"
568            }
569
570            # lets see if it works
571            if {[valid_page_p $url]} {
572
573                # if it does, update the database
574                set sql "update turl set working_date = now() where monitor_id = $monitor_id" 
575                if { [catch {ns_db dml $db $sql} errmsg] } {
576                    ns_log Error "Database error: $errmsg"
577                }
578            } else {
579
580                # it stopped working
581                if {$non_working_days > [disable_days]} {
582                    set sql "update turl set enabled = 'f' where monitor_id = $monitor_id"
583                    if { [catch {ns_db dml $db $sql} errmsg] } {
584                        ns_log Error "Database error: $errmsg"
585                    }
586                } 
587            }
588        }
589    }
590    ns_log Notice "Took [expr [ns_time] - $start_time] seconds to check $n_urls urls"
591}
592 
593ns_log Notice "Done loading turl procs"
594
595# register our url handler
596ns_register_proc GET  [turl_url] turl
597ns_register_proc POST [turl_url] turl
598ns_register_proc HEAD [turl_url] turl
599
600ns_log Notice "Registred handled for [turl_url]"
601
602# schedule our nightly checking
603ns_share -init {set schedule_check_urls 0} schedule_check_urls
604if {!$schedule_check_urls} {
605    ns_schedule_daily 0 0 check_urls
606    ns_log Notice "URL check has been scheduled."
607}
Note: See TracBrowser for help on using the repository browser.