par
Raypulsif » 24 nov. 2012, 17:51
bonjour à tous,
mon souci est le suivant : je souhaite récupérer le contenu html d'une page web dont l'accès est protégé par session.
J'ai pensé à CURL pour simuler le login et l'accès à la page, mais je ne récupère qu'une me disant que je ne suis pas loggué. Je n'arrive pas à trouver l'endroit qui bloque.
Voici mon code :
Code : Tout sélectionner
<?php
/*
Here is a script that is usefull to :
- login to a POST form,
- store a session cookie,
- download a file once logged in.
*/
// INIT CURL
$ch = curl_init();
// SET URL FOR THE POST FORM LOGIN
curl_setopt($ch, CURLOPT_URL, 'https://mywebsite.com/user/login');
// ENABLE HTTP POST
curl_setopt ($ch, CURLOPT_POST, 1);
// SET POST PARAMETERS : FORM VALUES FOR EACH FIELD
curl_setopt ($ch, CURLOPT_POSTFIELDS, 'name=myname&pass=mypass&form_id=user_login');
// IMITATE CLASSIC BROWSER'S BEHAVIOUR : HANDLE COOKIES
curl_setopt ($ch, CURLOPT_COOKIEJAR, "/tmp/cookieFileName.txt");
//curl_setopt($ch, CURLOPT_REFERER, 'http://mywebsite.com');
//curl_setopt($ch, CURLOPT_HEADER, TRUE);
//curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
# Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
# not to print out the results of its query.
# Instead, it will return the results as a string return value
# from curl_exec() instead of the usual true/false.
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
// EXECUTE 1st REQUEST (FORM LOGIN)
$store = curl_exec ($ch);
$info = curl_getinfo($ch);
/* i might have already problems here since $info contains :
Array
(
[url] => https://mywebsite.com/user/login
[content_type] =>
[http_code] => 0
[header_size] => 0
[request_size] => 0
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0
[namelookup_time] => 0
[connect_time] => 0.171
[pretransfer_time] => 0
[size_upload] => 0
[size_download] => 0
[speed_download] => 0
[speed_upload] => 0
[download_content_length] => -1
[upload_content_length] => -1
[starttransfer_time] => 0
[redirect_time] => 0
)
*/
// SET FILE TO DOWNLOAD
curl_setopt($ch, CURLOPT_URL, 'http://mywebsite.com/users/en/myfile/1/');
curl_setopt($ch, CURLOPT_COOKIEFILE, "/tmp/cookieFileName.txt");
// EXECUTE 2nd REQUEST (FILE DOWNLOAD)
$content = curl_exec ($ch);
// CLOSE CURL
curl_close ($ch);
?>
LE fichier cookie.twt contient :
Code : Tout sélectionner
# Netscape HTTP Cookie File
# http://curl.haxx.se/...ookie_spec.html
# This file was generated by libcurl! Edit at your own risk.
mywebsite.com FALSE / FALSE 0 LOL_TRIB p4epeqgp9tfijl0evi91rsl225
Alors que lorsque je fais une connexion naturelle à la main sur le site, mon navigateur enregistre beaucoup de cookies.
Un peu d'aide svp ?
bonjour à tous,
mon souci est le suivant : je souhaite récupérer le contenu html d'une page web dont l'accès est protégé par session.
J'ai pensé à CURL pour simuler le login et l'accès à la page, mais je ne récupère qu'une me disant que je ne suis pas loggué. Je n'arrive pas à trouver l'endroit qui bloque.
Voici mon code :
[code]
<?php
/*
Here is a script that is usefull to :
- login to a POST form,
- store a session cookie,
- download a file once logged in.
*/
// INIT CURL
$ch = curl_init();
// SET URL FOR THE POST FORM LOGIN
curl_setopt($ch, CURLOPT_URL, 'https://mywebsite.com/user/login');
// ENABLE HTTP POST
curl_setopt ($ch, CURLOPT_POST, 1);
// SET POST PARAMETERS : FORM VALUES FOR EACH FIELD
curl_setopt ($ch, CURLOPT_POSTFIELDS, 'name=myname&pass=mypass&form_id=user_login');
// IMITATE CLASSIC BROWSER'S BEHAVIOUR : HANDLE COOKIES
curl_setopt ($ch, CURLOPT_COOKIEJAR, "/tmp/cookieFileName.txt");
//curl_setopt($ch, CURLOPT_REFERER, 'http://mywebsite.com');
//curl_setopt($ch, CURLOPT_HEADER, TRUE);
//curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
# Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
# not to print out the results of its query.
# Instead, it will return the results as a string return value
# from curl_exec() instead of the usual true/false.
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
// EXECUTE 1st REQUEST (FORM LOGIN)
$store = curl_exec ($ch);
$info = curl_getinfo($ch);
/* i might have already problems here since $info contains :
Array
(
[url] => https://mywebsite.com/user/login
[content_type] =>
[http_code] => 0
[header_size] => 0
[request_size] => 0
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0
[namelookup_time] => 0
[connect_time] => 0.171
[pretransfer_time] => 0
[size_upload] => 0
[size_download] => 0
[speed_download] => 0
[speed_upload] => 0
[download_content_length] => -1
[upload_content_length] => -1
[starttransfer_time] => 0
[redirect_time] => 0
)
*/
// SET FILE TO DOWNLOAD
curl_setopt($ch, CURLOPT_URL, 'http://mywebsite.com/users/en/myfile/1/');
curl_setopt($ch, CURLOPT_COOKIEFILE, "/tmp/cookieFileName.txt");
// EXECUTE 2nd REQUEST (FILE DOWNLOAD)
$content = curl_exec ($ch);
// CLOSE CURL
curl_close ($ch);
?>
[/code]
LE fichier cookie.twt contient :
[code]
# Netscape HTTP Cookie File
# http://curl.haxx.se/...ookie_spec.html
# This file was generated by libcurl! Edit at your own risk.
mywebsite.com FALSE / FALSE 0 LOL_TRIB p4epeqgp9tfijl0evi91rsl225
[/code]
Alors que lorsque je fais une connexion naturelle à la main sur le site, mon navigateur enregistre beaucoup de cookies.
Un peu d'aide svp ?