KDD 2009 Annotated Abstracts Notes

From GM-RKB
Jump to navigation Jump to search

See: KDD-2009 Annotated Abstracts.


extractCvineInfo.sh

#!/bin/bash
# ./extractCvineInfo.sh >& cvine.tab

for file in `(cd ..; /bin/ls 4??? 5???)`
#for file in `(cd ..; /bin/ls 4900)`
do

  #echo $file
  fileLoc="../$file"
  lines=`wc -l $fileLoc | awk '{print $1}'`

  # GET THE TALK TYPE {S, P, W, D, T}
  lineTrack=`grep -n ">Track:<" $fileLoc | sed "s/:/ /" | awk '{print $1}'`
  Track=`grep ">Track:<" $fileLoc | awk '{print $2}' | sed "s/://"`
  trackG=`echo $Track | perl -ne 'chomp; s/([SPWDT]).*/$1/g; print $_'`

  # GET THE TITLE
  lineTitle=`grep -n “<h2>” $fileLoc | grep -v "Comment" | head -1 | sed "s/:/ /" | awk '{print $1}' `
  title=`grep “<h2>” $fileLoc | grep -v "Comment" | head -1 | sed "s/<h2>''"| sed "s/<\/h2>''"`

  # GET THE AUTHORS
  lineAuthors=`grep -n "Speaker" $fileLoc | grep -v "Comment" | head -1 | sed "s/:/ /" | awk '{print $1}'`
  authLines=`echo $lineTrack - $lineAuthors | bc`
  authors=`head -$lineTrack $fileLoc | tail -$authLines | grep href | perl -ne "chomp; s/<.*?>'' ;s/<.*/,/; print" | perl -ne "s/\s+/ /g; s/,[ ]*?$'' ; print"`

  # GET THE ABSTRACT
  # get the abstract's start line (add 1)
  # lineAbstract1=`grep -n class=\"body $fileLoc | sed "s/:/ /" | awk '{print $1}' | perl -ne 'chomp; print 1+$_'`
  lineAbstract1=`grep -n class=\"body $fileLoc | sed "s/:/ /" | awk '{print $1}'`
  lineAbstract1r=`echo $lines - $lineAbstract1 | bc`
  lineAbstract2_=`tail -$lineAbstract1r $fileLoc | grep -n "clear" | sed "s/:/ /" | awk '{print $1}' | head -1`

  abstract=`tail -$lineAbstract1r $fileLoc | head -$lineAbstract2_ | perl -ne 's/^\s+'' ; s/\n+/<BR>/g; s/<p>|<\/p>/ /g; print' | perl -ne 's/<\/div.*''g; s/\s*<BR>\s*$'' ; s/\s+/ /g; print' `

  # abstract=`head -$lineAbstract $fileLoc | tail -1 | sed "s/<p>''" | sed "s/<\/p>''" | sed "s/\s+/ /g"`

  #debug
  #echo "1[$fileLoc|$lineTrack|$trackG]"
  #echo "2[$lineTitle|$title]"
  #echo "3[$lineTrack|$lineAuthors|$authLines]"
  #head -$lineTrack $fileLoc | tail -$authLines | grep href
  #echo "5[$lineAuthors|$authLines|$authors]"

  # STORE
  tab="	"
  echo "$file$tab$Track$tab$trackG$tab$authors$tab$title$tab$abstract"
 
done

createWikiFiles.pl

#!/usr/bin/perl -w
# createWikiFiles.pl
# cat Combined4a.tab | ./createWikiFiles.pl > summaryPage

use strict ;

sub trim($);

while (<>) {
  #print "----\n$_";
  my @w = split(/\t/, $_) ;

  my $title=trim($w[0]);
  my $doi=trim($w[2]) ;
  my $authors=trim($w[5]) ;
  my $abstract=trim($w[6]) ;

  my $comprTitl = $title ;
  $comprTitl =~ s/[\ \.\:\;\-\"\']/ /g ;
  $comprTitl =~ s/(\w+)/\u$1/g;
  $comprTitl =~ s/\s+''g;
  $comprTitl = substr($comprTitl, 0, 26);

  my @authors=split(',',$authors) ;
  my $authorsItem ;
  my $i ;
  my @authorLnames ;
  for ($i=0; $i<=$#authors; $i++) {
  	my $author=$authors [$i] ;
  	$author=~s/^ +'' ;
    $authorsItem .= " " if $i>0;
    $authorsItem .= $author ;
    $authorsItem .= "," if $i<$#authors ;
    $authorsItem .= " and" if ($i>=0 and $i+1==$#authors) ;

    # decompose the name
  	my @authorName=split('\s', $author) ;
  	#print "authorName $#authorName @authorName\n";

  	my $authorLname ;
  	if ($#authorName==1) {
  		 $authorLname=$authorName[1] ;
  		 #print "2 $authorLname\n" ;
    } elsif ($author =~ /\./) {
    	 $authorLname = $author ;
    	 $authorLname =~ s/.*\. *'' ;
  		 #print ">2\n" ;
    } else {
    	$authorLname = $authorName[$#authorName] ;
    }
    push (@authorLnames, $authorLname) ;
  	#print "i=$i, [$author] [$authorLname]\n";
  
  }
  #print "$#authorLnames : @authorLnames\n" ;

  # Create the author abbreviation
  my $abbrAuthors ;
  if ($#authorLnames == 0) {
  	$abbrAuthors="$authorLnames [0]" ;
  	#print "one - $authorLnames [0]\n"
  } elsif ($#authorLnames == 1) {
  	$abbrAuthors="$authorLnames [0] & $authorLnames [1]" ;
  	#print "two - $authorLnames [0] & $authorLnames [1]\n"
  } else {
  	$abbrAuthors="$authorLnames [0] & al" ;
  	#print ">two - $authorLnames [0] & al\n";
  }

  my $authorsLname=$authors ;
  print "* ([[2009_$comprTitl|$abbrAuthors, 2009]]) ⇒ $authorsItem\. ([[2009]]). \"''$title\.''\" In: [[Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining]] ([[KDD-2009]]). [http://dx.doi.org/$doi doi:$doi]\n" ;
  #print “<B> $abstract\n";
  print "----\n\n" ;

  my $outfile = "2009_$comprTitl" ; # print "$outfile\n";
  open OUT, "> $outfile" or die "Can't open $outfile : $!" ;
  print OUT "* ([[2009_$comprTitl|$abbrAuthors, 2009]]) ⇒ $authorsItem\. ([[2009]]). \"''$title\.''\" In: [[Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining]] ([[KDD-2009]]). [http://dx.doi.org/$doi doi:$doi]\n" ;
  print OUT "----\n<B>Subject Headings:</B>\n----\n==Notes\n\n\n----\n==Quotes\n\n===Abstract\n" ;
  print OUT "* $abstract\n";
  print OUT "\n\n----\n" ;
  close OUT ;
}

# Perl trim function to remove whitespace from the start and end of the string
sub trim($)
{
	my $string = shift;
	$string =~ s/^\s+|\s+$''g ;
	return $string;
}


putUpOnWiki.sh

#!/bin/bash 
# ./putUpOnWiki.sh

for file in `/bin/ls 2009_*`
do

  echo $file
  dest='http://www.gabormelli.com/rkb_080709.cgi?'$file

  cat $file | ./wikiput.pl -s "putUpOnWiki 090724" "$dest" &
  sleep 5 &

done