# sesame2freebase.pl # perl sesame2freebase.pl chemical 10000 ns # perl sesame2freebase.pl obo 0 1000 use DBI; use WWW::Metaweb; use strict; my $ns = shift; my $action = shift; my $limit1 = shift; my $limit2 = shift; my $debug = shift; #my $dbh = DBI->connect("DBI:mysql:database=s_$ns;host=localhost", "root", "",{'RaiseError' => 1}); my $u = 'bio2rdf'; my $p = 'secret; my $queries = ""; my $queriesNb = 0; my $maxRepeat = 1; my $queryMax = 1; my $queryCount = 0; my $mh = WWW::Metaweb->connect( username => $u, password => $p, server => 'sandbox.freebase.com', #server => 'freebase.com', auth_uri => '/api/account/login', read_uri => '/api/service/mqlread', write_uri => '/api/service/mqlwrite', trans_uri => '/api/trans', pretty_json => 1 ); #exit; my $ns1 = $ns; $ns1 = "taxonomy" if ($ns eq "taxon"); my $dbh = DBI->connect("DBI:mysql:database=s_$ns1;host=localhost", "root", "",{'RaiseError' => 1}); SubjectsList($ns, $limit1, $limit2) if ($action eq "sub"); ObjectsList($ns, $limit1, $limit2) if ($action eq "obj"); Literals($ns, $limit1, $limit2) if ($action eq "lit"); Resources($ns, $limit1, $limit2) if ($action eq "res"); Namespaces() if ($action eq "ns"); Predicates($ns, $limit2) if ($action eq "pre"); exit; sub Namespaces { my $query = <prepare($query); $sth->execute(); my $rowCount = $sth->rows; my $count = 0; my( $name); $sth->bind_columns( undef, \$name ); while( $sth->fetch() ) { $count ++; #$name =~ /http:\/\/bio2rdf.org\/(.*):/; if ($name =~ /http:\/\/www.geneontology.org\/go#(.*):/) { $name = lc($1);; print "$count/$rowCount\t###ns###\t$name\n"; NamespaceCreate($name); } if ($name =~ /http:\/\/bio2rdf.org\/(\w*):$/) { $name = lc($1);; print "$count/$rowCount\t###ns###\t$name\n"; NamespaceCreate($name); } } $sth->finish(); } sub Predicates { my $ns = shift; my $limit = shift; my $query = <prepare($query); $sth->execute(); my $rowCount = $sth->rows; my $nb = 0; my( $name, $localname, $count ); $sth->bind_columns( undef, \$name, \$localname, \$count ); LOOP: while( $sth->fetch() ) { $nb ++; print "$nb/$rowCount\t###predicate###\t$name\t$localname\t$count\n"; # OBO predicate #goto LOOP if ($localname eq "name"); if ($name =~ /http:\/\/purl.org\/obo\/metadata#/) { #PredicateCreate("obo", $localname, "/type/text"); } if ($name =~ /http:\/\/www.geneontology.org\/dtds\/go.dtd#/) { PredicateCreate("obo", $localname, "/user/bio2rdf/default_domain/obo"); } # bio2rdf predicate if ($name =~ /http:\/\/bio2rdf.org\/(.*)#/) { $ns = $1; } if ($name =~ /http:\/\/dbpedia.org\/property\/(.*)/) { $ns = "dbpedia"; } #goto LOOP if ($name eq "http://bio2rdf.org/bio2rdf#"); goto LOOP if ($name eq "http://purl.org/dc/elements/1.1/"); goto LOOP if ($name eq "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); goto LOOP if ($name eq "http://www.w3.org/2000/01/rdf-schema#"); $ns = "crystaleye" if ($ns eq "iucr"); if ($localname =~ /isEnantiomerOf/) { PredicateCreate($ns, $localname, "/user/bio2rdf/default_domain/bio2rdf"); } elsif ($localname =~ /x(.*)/) { PredicateCreate($ns, $localname, "/user/bio2rdf/default_domain/bio2rdf"); } else { PredicateCreate($ns, $localname, "/type/text"); } } $sth->finish(); } sub SubjectsList { my $ns = shift; my $limit1 = shift; my $limit2 = shift; my $query = <= $limit1 and t.id < $limit2 EOF #and n1.name = 'http://bio2rdf.org/$ns:' my $sth = $dbh->prepare($query); $sth->execute(); my $rowCount = $sth->rows; my $count = 0; my( $name, $localname ); $sth->bind_columns( undef, \$name, \$localname ); while( $sth->fetch() ) { $count ++; print "$count/$rowCount\t###subject###\t$name\t$localname\n"; if ($name =~ /http:\/\/bio2rdf.org\/(.*)#/) { $ns = $1; } if ($name =~ /http:\/\/bio2rdf.org\/(.*):/) { $ns = $1; } if ($name =~ /http:\/\/www.geneontology.org\/go#(.*):/) { $ns = lc($1); } TopicCreate($ns, $localname); #TopicLiterals($name, $localname); #TopicRessources($name, $localname); } $sth->finish(); } sub ObjectsList { my $ns = shift; my $limit1 = shift; my $limit2 = shift; my $query = <= $limit1 and t.id < $limit2 EOF #and n1.name <> 'http://bio2rdf.org/$ns:' my $sth = $dbh->prepare($query); $sth->execute(); my $count = 0; #my( $name, $localname, $uri ); #$sth->bind_columns( undef, \$name, \$localname, \$uri ); my( $name, $localname); $sth->bind_columns( undef, \$name, \$localname); my $rowCount = $sth->rows; while( $sth->fetch() ) { $count ++; if ($name =~ /http:\/\/bio2rdf.org\/(.*):/) { $ns = lc($1); } if ($name =~ /http:\/\/www.geneontology.org\/go#(.*):/) { $ns = lc($1); } if ($name =~ /http:\/\/purl.uniprot.org\/taxonomy\//) { $ns = "taxon"; } print "$count/$rowCount\t###object###\t$ns\t$localname\n"; TopicCreate($ns, $localname); } $sth->finish(); } sub Literals { my $ns = shift; my $limit1 = shift; my $limit2 = shift; my $count = 0; my $query = <= $limit1 and t.id < $limit2 ORDER BY t.id EOF #LIMIT $limit #and r1.namespace <> "" #and n1.name = 'http://bio2rdf.org/$ns:' my $sth = $dbh->prepare($query); $sth->execute(); my( $name1, $localname1, $name2, $localname2, $label ); $sth->bind_columns( \$name1, \$localname1, \$name2, \$localname2, \$label ); my $rowCount = $sth->rows; print "$rowCount\n"; LOOP: while( $sth->fetch() ) { #print "######$name1, $localname1, $name2, $localname2, $label\n"; $count ++; $name1 =~ /http:\/\/bio2rdf.org\/(.*):/; my $ns = $1; $name2 =~ /http:\/\/bio2rdf.org\/(.*)#/; my $ns2 = $1; if ($name1 =~ /http:\/\/www.geneontology.org\/go#(.*):/) { $ns = lc($1); } if ($name1 =~ /http:\/\/www.nlm.nih.gov\/mesh\/2006#/) { $ns = "mesh"; } if ($name1 =~ /http:\/\/purl.uniprot.org\/taxonomy\//) { $ns = "taxon"; } if ($name2 =~ /http:\/\/purl.org\/obo\/metadata#/) { $ns2 = "obo"; } if ($name2 =~ /http:\/\/www.geneontology.org\/dtds\/go.dtd#/) { $ns2 = "obo"; } $ns = "crystaleye" if ($ns eq "iucr"); print "$count/$rowCount\t###literal###\t$name1\t$localname1\t$name2\t$localname2\t$label\n"; goto LOOP if ($localname2 eq "identifier"); #goto LOOP if ($localname2 eq "url"); #goto LOOP if ($localname2 eq "urlImage"); goto LOOP if ($localname2 eq "type"); goto LOOP if ($localname2 eq "created"); goto LOOP if ($localname2 eq "modified"); goto LOOP if ($localname2 eq "curated"); if ($localname2 eq "name") { $localname1 =~ s/\./_/g; TopicConnect("update", $ns, $localname1, "/type/object/name", "$label [$ns:$localname1]"); TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/title", $label); } elsif ($localname2 eq "prefLabel") { TopicConnect("update", $ns, $localname1, "/type/object/name", "$label [$ns:$localname1]"); TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/title", $label); } elsif ($localname2 eq "title") { TopicConnect("update", $ns, $localname1, "/type/object/name", "$label [$ns:$localname1]"); TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/title", $label); } elsif ($localname2 eq "scientificName") { TopicConnect("update", $ns, $localname1, "/type/object/name", "$label [$ns:$localname1]"); TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/title", $label); } elsif ($localname2 eq "url") { TopicURL("$ns/$localname1", "$label"); } elsif ($localname2 eq "scopeNote" and $ns eq "mesh") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/comment", $label); } elsif ($localname2 eq "label" and $ns eq "mesh") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/comment", $label); } elsif ($localname2 eq "comments") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/comment", $label); } elsif ($localname2 eq "definition") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/comment", $label); } elsif ($localname2 eq "comment") { $label =~ s///g; $label =~ s/"//g; TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bio2rdf/comment", $label); } elsif ($localname2 eq "altLabel") { TopicConnect("insert", $ns, $localname1, "/common/topic/alias", $label); } elsif ($localname2 eq "otherName") { TopicConnect("insert", $ns, $localname1, "/common/topic/alias", $label); } elsif ($localname2 eq "synonym") { TopicConnect("insert", $ns, $localname1, "/common/topic/alias", $label); } elsif ($localname2 eq "urlPattern") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bm/url_example", $label); } elsif ($localname2 eq "namespace") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bm/reserved_namespace", $label); } elsif ($localname2 eq "category") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bm/category1", $label); } elsif ($localname2 eq "subCategory") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bm/category1", $label); } elsif ($localname2 eq "citation") { TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/bm/citation", $label); } else { #TopicConnect("insert", $ns, $localname1, "/user/bio2rdf/default_domain/$ns2/".lc($localname2), $label); } } $sth->finish(); } sub Resources { my $ns = shift; my $limit1 = shift; my $limit2 = shift; my $query = <= $limit1 and t.id < $limit2 EOF #and r3.namespace <> "" #and n1.name = 'http://bio2rdf.org/$ns:' #LIMIT $limit #and n1.name = "$ns" #and r1.localname = "$id" my $sth = $dbh->prepare($query); $sth->execute(); my $rowCount = $sth->rows; my( $name1, $localname1, $name2, $localname2, $name3, $localname3); $sth->bind_columns( undef, \$name1, \$localname1, \$name2, \$localname2, \$name3, \$localname3 ); my $count = 0; LOOP: while( $sth->fetch() ) { $count ++; $name2 = "crystaleye" if ($name2 eq "iucr"); if ($name1 =~ /http:\/\/www.nlm.nih.gov\/mesh\/2006#/) { $name1 = "mesh"; $name3 = "mesh"; } if ($name1 =~ /http:\/\/bio2rdf.org\/(.*):/) { $name1 = $1; } if( $name2 =~ /http:\/\/bio2rdf.org\/(.*)#/) { $name2 = $1; } if( $name3 =~ /http:\/\/bio2rdf.org\/(.*):/) { $name3 = $1; } if ($name1 =~ /http:\/\/www.geneontology.org\/go#(.*):/) { $name1 = lc($1); } if ($name3 =~ /http:\/\/www.geneontology.org\/go#(.*):/) { $name3 = lc($1); } if ($name2 =~ /http:\/\/purl.org\/obo\/metadata#/) { $name2 = "obo"; } if ($name2 =~ /http:\/\/www.geneontology.org\/dtds\/go.dtd#/) { $name2 = "obo"; } goto LOOP if ($localname2 eq "type"); goto LOOP if ($localname2 eq "creator"); goto LOOP if ($localname2 eq "seeAlso"); print "$count/$rowCount\t###ressource###\t$name1\t$localname1\t$name2\t$localname2\t$name3\t$localname3\n"; # if ($localname2 =~ /x(.*)/) { # $localname2 = $1; # } if ($localname2 eq "related") { TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/bio2rdf/seealso"); } if ($localname2 eq "is_a") { TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/bio2rdf/isa"); } if ($localname2 eq "isA") { TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/bio2rdf/isa"); } if ($localname2 eq "broader") { TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/bio2rdf/isa"); } if ($localname2 eq "part_of") { TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/bio2rdf/partof"); } if ($localname2 eq "partOf") { TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/bio2rdf/partof"); } if ($localname2 eq "dbxref") { $localname2 = "xref"; $name2 = "bio2rdf"; $name3 = $name1; } #TopicLink("$name1/$localname1", "$name3/$localname3", "/user/bio2rdf/default_domain/$name2/".lc($localname2)); } $sth->finish(); } sub NamespaceCreate { my $ns = shift; $ns =~ s/-/_/g; $ns =~ s/ /_/g; $ns = lc($ns); return if (length($ns) == 0); my $query_json = <write($query_json, 'json'); }; if ($@) { print "###ERROR### $@\n"; } print "Time/".localtime(time)."/$i/$result/" . length($result) . "\n"; $i ++; } } sub SubmitBUG { my $query_json = shift; #print $query_json; if ( $queryCount < 100 ) { my $queryCount ++; $mh->add_write_query(query_json => $query_json); #$mh->add_write_query(query_json1 => $query_json1); #$mh->add_write_query(query_json2 => $query_json2); print "write_query_count: $queryCount".$mh->query_count("write")."\n"; #} #else { $queryCount = 0; my $http_was_successful = $mh->send_envelope('write') or die $WWW::Metaweb::errstr; my $result = $mh->result('write', 'json'); print "Time/".localtime(time)."/$http_was_successful/".$result . "\n"; my $text = $mh->raw_result; print $text; } }